By: Marlon Teodosio
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import requests
import matplotlib.pyplot as plt
import pandas_profiling as pp
from io import BytesIO, StringIO
print("Last update:", pd.to_datetime('today').strftime("%b %d, %Y %H:%M:%S"))
import plotly.express as px
import plotly.io as pio
pio.templates
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pio.templates.default = 'plotly_white'
import plotly.offline as py
py.init_notebook_mode()
from IPython.display import HTML
HTML('''<script>
function code_toggle() {
if (code_shown){
$('div.input').hide('500');
$('#toggleButton').val('Show Code')
} else {
$('div.input').show('500');
$('#toggleButton').val('Hide Code')
}
code_shown = !code_shown
}
$( document ).ready(function(){
code_shown=false;
$('div.input').hide()
});
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
The data is extracted from the official DOH COVID-19 DataDrop.
#########################################################################
########################## 05 Case Information ##########################
#########################################################################
data_src = 'https://drive.google.com/open?id=1fed36fegqVEQM-h3WhskR0oCELI2zQ73' # Get shareable link from DOH Data Drop
file_id = data_src.split('=')[1]
dwn_url='https://drive.google.com/uc?export=download&id=' + file_id
url = requests.get(dwn_url).text
csv_raw = StringIO(url)
covid_ph = pd.read_csv(csv_raw, encoding = 'utf-8')
for col in covid_ph.columns:
if 'Date' in col:
covid_ph[col] = pd.to_datetime(covid_ph[col], dayfirst=True)
covid_ph['CityMunRes'] = covid_ph['CityMunRes'].str.replace(u'Ã\x91', 'Ñ')
covid_ph['CityMunRes'] = covid_ph['CityMunRes'].str.replace(u'Ã\x83±', 'Ñ')
covid_ph['RemovalType'] = covid_ph['RemovalType'].fillna('Active')
############################## CHECKER #################################
# covid_ph[covid_ph.RegionRes == 'NCR'].groupby('CityMunRes')['CaseCode'].count().sort_values(ascending=False).index.tolist()
print("Case Information Data as of:", covid_ph.DateRepConf.max().date().strftime("%b %d"))
# save the latest file
covid_ph.to_excel("./data/covid_ph_database.xlsx")
########################################################################
######################### 07 Testing Aggregate #########################
########################################################################
url = 'https://drive.google.com/open?id=1cXfNiC7RoshSfW4FoBMEOB3VAu9qMrcS'
file_id = url.split('=')[1]
dwn_url = 'https://drive.google.com/uc?export=download&id=' + file_id
url = requests.get(dwn_url).text
csv_raw = StringIO(url)
df_tests = pd.read_csv(csv_raw)
# format date
df_tests['report_date'] = pd.to_datetime(df_tests['report_date'], dayfirst=True)
# change NAN & "-" to 0 (considering that these are equivalent to N/A or no value)
# df_tests['REMAINING AVAILABLE TESTS'] = df_tests['REMAINING AVAILABLE TESTS'].fillna("0")
# df_tests['remaining_available_tests'] = df_tests['remaining_available_tests'].str.replace("-", "0")
# # format count columns
# cols = [col for col in df_tests.columns if 'INDIVIDUALS' in col]
# for col in cols:
# df_tests[col] = df_tests[col].str.replace(',', '').astype(int)
# df_tests['TOTAL TESTS CONDUCTED'] = df_tests['TOTAL TESTS CONDUCTED'].str.replace(',', '').astype(int)
# df_tests['remaining_available_tests'] = df_tests['remaining_available_tests'].str.replace(',', '').astype(int)
# df_tests.tail()
print("Facility Testing Data as of:", df_tests.report_date.max().date().strftime("%b %d"))
df_tests.to_excel("./data/testingaggregates.xlsx")
########################################################################
################## 05 DOH Data Collect - Daily Report ##################
########################################################################
url = 'https://drive.google.com/open?id=1ocaAEwyVDU7rlYbU0D-xv13tQwEAJ5h4'
file_id = url.split('=')[1]
dwn_url='https://drive.google.com/uc?export=download&id=' + file_id
url = requests.get(dwn_url).text
csv_raw = StringIO(url)
df_hosp = pd.read_csv(csv_raw)
df_hosp['reportdate'] = pd.to_datetime(df_hosp['reportdate'])
print("Hospital, Bed & MechVent Data as of:", df_hosp.reportdate.max().date().strftime("%b %d"))
# HOSPITAL - PPE
# url = 'https://drive.google.com/open?id=1b3NGhxEn59dubEnDHxf7F1taYfxkUHn1'
# file_id = url.split('=')[1]
# dwn_url='https://drive.google.com/uc?export=download&id=' + file_id
# url = requests.get(dwn_url).text
# csv_raw = StringIO(url)
# df_ppe = pd.read_csv(csv_raw)
# df_ppe.head(5)
total_cases = len(covid_ph)
recovered = len(covid_ph.loc[covid_ph.RemovalType=='Recovered'])
died = len(covid_ph.loc[covid_ph.RemovalType=='Died'])
active_cases = total_cases - recovered - died
percent_active = active_cases / total_cases
percent_recovered = recovered / total_cases
percent_died = died / total_cases
asof = pd.to_datetime(covid_ph.DateRepConf.unique().max()) # latest available data
new = len(covid_ph[covid_ph.DateRepConf == asof])
cases_daily = (covid_ph.groupby('DateRepConf')['CaseCode']
.count().resample('1D').sum())
# recoveries_daily = (covid_ph.loc[df.RemovalType=='Recovered']
# .groupby('DateRecover')['CaseCode'].count().resample('1D').sum())
recoveries_daily = (covid_ph.loc[covid_ph.RemovalType=='Recovered']
.groupby('DateRepRem')['CaseCode'].count().resample('1D').sum())
# deaths_daily = (covid_ph.loc[df.RemovalType=='Died']
# .groupby('DateDied')['CaseCode'].count().resample('1D').sum())
deaths_daily = (covid_ph.loc[covid_ph.RemovalType=='Died']
.groupby('DateRepRem')['CaseCode'].count().resample('1D').sum())
cases_daily.to_excel("./data/cases_daily.xlsx")
recoveries_daily.to_excel("./data/recoveries_daily.xlsx")
deaths_daily.to_excel("./data/deaths_daily.xlsx")
data = pd.DataFrame(data=[[active_cases, percent_active, f"{100*percent_active:.1f}%"],
[died, percent_died, f"{100*percent_died:.1f}%"],
[recovered, percent_recovered, f"{100*percent_recovered:.1f}%"]],
index=['Active', 'Died', 'Recovered'],
columns=['count', 'percent', 'pct']).reset_index()
data['cum_percent'] = data.percent.cumsum()
data['new'] = pd.Series([cases_daily[-1]-deaths_daily[-1]-recoveries_daily[-1], deaths_daily[-1], recoveries_daily[-1]])
# Display the Dashboard
print('\033[1m' + '\033[94m' + "PH COVID-19 Tracker" + '\033[0m')
print("As of", asof.date().strftime("%b %d, %Y"))
fig = px.bar(data, x='percent', orientation='h', color='index',
text='count', hover_name='pct', height=250,
labels={'percent': " ", 'index':'PatientType'})
fig.update_layout(legend=dict(orientation='h', x=-0.01, y=-0.1, title="", font_size=14),
title=("Total Confirmed Cases: " + f"<b>{total_cases}</b>"),
hovermode=False)
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=16))
for i in range(0, len(data.percent.unique())):
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=data.cum_percent.unique()[i]-0.001, y=0.6,
text=data.pct[i]), font_size=12,
font_color = px.colors.qualitative.Plotly[i])
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=data.cum_percent.unique()[i]-0.001, y=-0.7,
text="(+"+str(data.new[i])+")"), font_size=14,
font_color = px.colors.qualitative.Plotly[i])
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=0.18, y=-0.7,
text='New Cases: (+'+str(cases_daily[-1])+')', font_size=14))
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
covid_ph1 = covid_ph.copy()
covid_ph1['RemovalType'] = covid_ph1.RemovalType.fillna('Active')
data = covid_ph1[covid_ph1.RemovalType == 'Active'].groupby('HealthStatus').CaseCode.count().reset_index().rename({'CaseCode': 'Count'}, axis=1)
data['Percent'] = data['Count']/data['Count'].sum()*100
data['Pct'] = round(data['Percent'], 1).astype(str)+'%'
data['Cum_percent'] = data.Percent.cumsum()
# a = pd.Series(['Critical', 'Severe', 'Mild', 'Asymptomatic']).reset_index().rename({0: 'HealthStatus'}, axis=1)
a = pd.Series(['Asymptomatic', 'Mild', 'Severe', 'Critical']).reset_index().rename({0: 'HealthStatus'}, axis=1)
data1 = pd.merge(data, a, how='left', on='HealthStatus')
data1 = data1.sort_values(by='index', ascending=False)
data1['Count_percent'] = data['Count'].astype(str) + " (" + data1['Pct'] + ")"
fig = px.bar(data1, x='Count', y='HealthStatus', orientation='h',
text='Count', hover_name='Pct', height=400)
fig.update_layout(title=("Health Status of Active Cases"), showlegend=False,
hoverlabel=dict(font_size=17, bgcolor='blue'), hovermode='y', margin=dict(l=200))
fig.update_traces(textposition='outside', textfont_size=15)
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False, title=None,
range=[0, data1.Count.sum()])
fig.update_yaxes(title=None, tickfont_size=15)
fig.show()
topcity = covid_ph[covid_ph.RegionRes == 'NCR'].groupby('CityMunRes')['CaseCode'].count().sort_values(ascending=False).index.tolist()
# topcity.remove('For Validation')
# topcity.append('For Validation')
topcity
data01 = pd.DataFrame()
for i in list(reversed(topcity)):
df = covid_ph[covid_ph.CityMunRes == i].groupby('RemovalType').count()['CaseCode'].reset_index()
df = df.sort_values('RemovalType')
df['City'] = i
df = df.rename({'CaseCode': 'Count0', 'RemovalType': 'Type'}, axis=1)
df['Percent'] = df['Count0'] / df['Count0'].sum()*100
df['Pct'] = round(df['Percent'], 1).astype(str)+'%'
data01 = data01.append(df)
data01['Count'] = data01.Count0.astype(int).apply(lambda x : "{:,}".format(x))
cases_ncr = covid_ph[covid_ph.RegionRes == 'NCR'].shape[0]
fig = px.bar(data01, x='Percent', orientation='h', color='Type',
title="Total Confirmed Cases in <b>NCR</b>: " + f"<b>{cases_ncr}</b>" + " (" + str(round(cases_ncr/total_cases*100,1)) + "% of Total)",
text='Count', hover_name='Count', height=800)
fig.update_layout(legend=dict(orientation='h', x=0.52, y=1, title="", font_size=12), hoverlabel=dict(font_size=20))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=8))
for i in range(0, len(data01.City.unique())):
# City
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=0, y=i,
text=data01.City.unique()[i] + " "*15), font_size=15)
# Death Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=104, y=i,
text=data01[(data01.Type == 'Died') & (data01.City == list(reversed(topcity))[i])]['Pct'].values[0]),
font_size=14, font_color = px.colors.qualitative.Plotly[1])
# Recovery Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=119, y=i,
text=data01[(data01.Type == 'Recovered') & (data01.City == list(reversed(topcity))[i])]['Pct'].values[0]),
font_size=14, font_color = px.colors.qualitative.Plotly[2])
# Total Confirmed Cases
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=0, y=i,
text="{:,}".format(data01[data01.City == list(reversed(topcity))[i]].Count0.sum())+" "*3),
font_size=14)
# Death Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=103, y=17,
text="Fatality<br>Rate"),
font_size=11, font_color = px.colors.qualitative.Plotly[1])
# Recovery Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=118, y=17,
text="Recovery<br>Rate"),
font_size=11, font_color = px.colors.qualitative.Plotly[2])
# Total Confirmed Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=-1, y=17,
text="Confirmed<br>Cases"),
font_size=11)
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=8, title=None)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
a1 = data01.pivot(index='City', columns='Type', values='Percent').reset_index()
a2 = data01[['City', 'Count0']].groupby('City').sum().reset_index()
data01_1 = pd.merge(a1, a2, how='left', on='City')
data01_1.rename({'Active': 'Active Rate (%)',
'Died': 'Fatality Rate (%)',
'Recovered': 'Recovery Rate (%)',
'Count0': 'Case Count'}, axis=1, inplace=True)
data01_1['City'] = data01_1['City'].str.replace('CITY OF ', '')
data01_1['Active Rate (%)'] = round(data01_1['Active Rate (%)'], 1)
data01_1['Fatality Rate (%)'] = round(data01_1['Fatality Rate (%)'], 1)
data01_1['Recovery Rate (%)'] = round(data01_1['Recovery Rate (%)'], 1)
figgg = px.scatter(data01_1, x='Recovery Rate (%)', y='Fatality Rate (%)',
title='COVID-19 Status in NCR Cities',
size='Case Count', text='City', size_max=40)
figgg.update_layout(hoverlabel=dict(font_size=20), font_size=11,
legend=dict(orientation="v", title="", font_size=12,
x=0.01, y=0.01, yanchor="bottom"))
figgg.update_xaxes(range=(0, round(data01_1['Recovery Rate (%)'].max()*1.1)), titlefont_size=16)
figgg.update_yaxes(range=(0, round(data01_1['Fatality Rate (%)'].max()*1.1)), titlefont_size=16)
figgg.add_scatter(x=[round(percent_recovered*100,1)]*len(list(range(0, round(data01_1['Fatality Rate (%)'].max()*1.1)+1))),
y=list(range(0, round(data01_1['Fatality Rate (%)'].max()*1.1)+1)),
line=dict(color=px.colors.qualitative.Plotly[2], dash='dash'),
mode='lines', name='Total PH Recovery Rate', hoverinfo='skip')
figgg.add_scatter(y=[round(percent_died*100,1)]*len(list(range(0, round(data01_1['Recovery Rate (%)'].max()*1.1)+1))),
x=list(range(0, round(data01_1['Recovery Rate (%)'].max()*1.1)+1)),
line=dict(color=px.colors.qualitative.Plotly[1], dash='dash'),
mode='lines', name='Total PH Fatality Rate', hoverinfo='skip')
figgg.show()
base = pd.DataFrame(covid_ph.DateRepConf.sort_values().unique()).rename({0: 'Date'}, axis=1)
c = cases_daily.reset_index().rename({'DateRepConf': 'Date', 'CaseCode': 'Count'}, axis=1)
c = pd.merge(base, c, how='left', on='Date')
c['Type'] = 'Reported Confirmed Cases'
c.Count.fillna(0)
c['Cum_count'] = c.Count.cumsum()
c['Percent_change'] = c.Count.pct_change()*100
c['%Change'] = c.Cum_count.pct_change()*100
d = deaths_daily.reset_index().rename({'DateRepRem': 'Date', 'CaseCode': 'Count'}, axis=1)
d = pd.merge(base, d, how='left', on='Date')
d['Type'] = 'Reported Deaths'
#d = deaths_daily.reset_index().rename({'DateDied': 'Date', 'CaseCode': 'Count'}, axis=1)
#d['Type'] = 'Number of Actual Deaths'
d.Count = d.Count.fillna(0)
d['Cum_count'] = d.Count.cumsum()
d['Percent_change'] = d.Count.pct_change()*100
d['%Change'] = d.Cum_count.pct_change()*100
r = recoveries_daily.reset_index().rename({'DateRepRem': 'Date', 'CaseCode': 'Count'}, axis=1)
r = pd.merge(base, r, how='left', on='Date')
r['Type'] = 'Reported Recoveries'
#r = recoveries_daily.reset_index().rename({'DateRecover': 'Date', 'CaseCode': 'Count'}, axis=1)
#r['Type'] = 'Number of Actual Recoveries'
r.Count = r.Count.fillna(0)
r['Cum_count'] = r.Count.cumsum()
r['Percent_change'] = r.Count.pct_change()*100
r['%Change'] = r.Cum_count.pct_change()*100
import statsmodels.api as sm
cycle, trend1 = sm.tsa.filters.hpfilter(c['Count'], 7)
c['trend_HP'] = trend1
cycle, trend2 = sm.tsa.filters.hpfilter(d['Count'], 7)
d['trend_HP'] = trend2
cycle, trend3 = sm.tsa.filters.hpfilter(r['Count'], 7)
r['trend_HP'] = trend3
c['trend_7MA'] = c['Count'].rolling(7).mean()
d['trend_7MA'] = d['Count'].rolling(7).mean()
r['trend_7MA'] = r['Count'].rolling(7).mean()
c['trend_7MA'] = c['trend_7MA'].fillna(" ")
dr = pd.concat([r,d])
dr['trend_7MA_name'] = 'Trend (7-day MA)'
dr['trend_HP_name'] = 'Trend (HP Filter)'
d['trend_7MA_name'] = 'Trend (7-day MA)'
d['trend_HP_name'] = 'Trend (HP Filter)'
r['trend_7MA_name'] = 'Trend (7-day MA)'
r['trend_HP_name'] = 'Trend (HP Filter)'
c['trend_7MA_name'] = 'Trend (7-day MA)'
c['trend_HP_name'] = 'Trend (HP Filter)'
fig1 = px.bar(c, x='Date', y='Count', color='Type',
color_discrete_sequence=["lightblue"])
fig2 = px.line(c, x='Date', y='trend_7MA', color='trend_7MA_name', line_shape='spline',
color_discrete_sequence=['blue'])
# fig3 = px.line(c, x='Date', y='trend_HP', color='trend_HP_name', line_shape='spline',
# color_discrete_sequence=['lightblue'])
fig1.add_trace(fig2.data[0])
# fig1.add_trace(fig3.data[0].update(line={'dash': 'dash'}))
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
xaxis=dict(tickmode='array', dtick=0,
tickfont_size=9, title="Date"),
legend=dict(orientation="v", title="",
x=0.05, y=0.75, yanchor="bottom"),
hoverlabel=dict(font_size=20))
fig1.show()
fig1 = px.bar(d, x='Date', y='Count', color='Type',
color_discrete_sequence=["lightpink"])
fig2 = px.line(d, x='Date', y='trend_7MA', color='trend_7MA_name', line_shape='spline',
color_discrete_sequence=['tomato'])
# fig3 = px.line(c, x='Date', y='trend_HP', color='trend_HP_name', line_shape='spline',
# color_discrete_sequence=['pink'])
fig1.add_trace(fig2.data[0])
# fig1.add_trace(fig3.data[0].update(line={'dash': 'dash'}))
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Daily Death Cases"),
xaxis=dict(tickmode='array', dtick=0,
tickfont_size=9, title="Date"),
legend=dict(orientation="v", title="",
x=0.05, y=0.75, yanchor="bottom"),
hoverlabel=dict(font_size=20))
fig1.show()
fig1 = px.bar(r, x='Date', y='Count', color='Type',
color_discrete_sequence=["lightgreen"])
fig2 = px.line(r, x='Date', y='trend_7MA', color='trend_7MA_name', line_shape='spline',
color_discrete_sequence=['green'])
# fig3 = px.line(c, x='Date', y='trend_HP', color='trend_HP_name', line_shape='spline',
# color_discrete_sequence=['pink'])
fig1.add_trace(fig2.data[0])
# fig1.add_trace(fig3.data[0].update(line={'dash': 'dash'}))
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Daily Recovered Cases"),
xaxis=dict(tickmode='array', dtick=0,
tickfont_size=9, title="Date"),
legend=dict(orientation="v", title="",
x=0.05, y=0.75, yanchor="bottom"),
hoverlabel=dict(font_size=20))
fig1.show()
rc = np.maximum(cases_daily - recoveries_daily - deaths_daily, 0).fillna(0).reset_index()
rc.rename({'CaseCode': 'Count', 'index': 'Date'}, axis=1, inplace=True)
rc['Cum_count'] = rc.Count.cumsum()
rc['Type'] = 'Reported Remaining Active Cases'
cc = cases_daily.reset_index()
cc.rename({'CaseCode': 'Count', 'DateRepConf': 'Date'}, axis=1, inplace=True)
cc['Cum_count'] = cc.Count.cumsum()
cc['Type'] = 'Reported Confirmed Cases'
# Figure 1
fig1 = px.bar(cc, x='Date', y='Cum_count', color='Type',
color_discrete_sequence=['lightblue'])
# Figure 1.1
fig2 = px.line(rc, x='Date', y='Cum_count', color='Type',
color_discrete_sequence=["blue"])
fig1.add_trace(fig2.data[0])
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Cumulative Number of Cases"),
xaxis=dict(tickmode='array', dtick=0, tickfont_size=9, title="Date"),
legend=dict(orientation="v", title="",
x=0.05, y=0.8, yanchor="bottom"),
hoverlabel=dict(font_size=20), hovermode="x")
fig1.show()
# Figure 2
fig2 = px.line(dr, x='Date', y='Cum_count', color='Type', line_shape='spline',
color_discrete_sequence=['limegreen', 'tomato'])
fig2.update_yaxes(title="Case Count", showgrid=True, tickfont_size=10)
fig2.update_layout(title_text="PH Covid-19 Cumulative Number of Cases")
fig2.update_layout(title=dict(x=0.5),
xaxis=dict(tickmode='array', showgrid=False, #tick0=0,
dtick=0, tickfont_size=9, title="Date"),
legend=dict(orientation="v", title="",
x=0.05, y=0.8, yanchor="bottom"),
hoverlabel=dict(font_size=20), hovermode='x')
fig2.show()
cc_ecq = cc[cc.Date >= '2020-03-15'].reset_index(drop=True).reset_index()
dr_ecq = dr[dr.Date >= '2020-03-15']
# Figure 1
fig1 = px.line(cc_ecq, x='Date', y='Cum_count', color='Type', log_y=True,
color_discrete_sequence=['blue'])
fig1.update_yaxes(title="log(Case Count)", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Cumulative Number of Cases (in Log Scale)"),
xaxis=dict(tickmode='array', showgrid=False, dtick=0, tickfont_size=9, title="Day"),
legend=dict(orientation="v", title="",
x=0.05, y=0.8, yanchor="bottom"),
hoverlabel=dict(font_size=20), hovermode="x")
# fig1.update_layout(shapes=[dict(type= 'line',
# yref= 'y', y0=1, y1=10000,
# xref= 'x', x0=1, x1=50,
# line=dict(color="green", width=1, dash="dashdot"))])
fig1.show()
# Figure 2
fig2 = px.line(dr_ecq, x='Date', y='Cum_count',
color='Type',
line_shape='spline', log_y = True,
color_discrete_sequence=['limegreen', 'tomato'])
fig2.update_yaxes(title="log(Case Count)", showgrid=True, tickfont_size=10)
fig2.update_layout(title_text="PH Covid-19 Cumulative Number of Cases (in Log Scale)")
fig2.update_layout(title=dict(x=0.5),
xaxis=dict(tickmode='array', showgrid=False, #tick0=0,
dtick=0, tickfont_size=9, title="Date"),
legend=dict(orientation="v", title="",
x=0.05, y=0.8, yanchor="bottom"),
hoverlabel=dict(font_size=20), hovermode='x')
fig2.show()
print("Note: Day 0 is March 15 when ECQ was implemented & hit 140 confirmed cases.")
# # Create figure with secondary y-axis
# fig = make_subplots(specs=[[{"secondary_y": True}]])
# # Figure 1
# fig1 = px.line(dr, x='Date', y='Cum_count', color='Type', line_shape='spline',
# color_discrete_sequence=['limegreen', 'tomato'])
# # Figure 2
# fig2 = px.bar(c, x='Date', y='Cum_count', color='Type',
# color_discrete_sequence=["lightblue"])
# fig.add_trace(fig2.data[0], secondary_y=False)
# fig.add_trace(fig1.data[0], secondary_y=True)
# fig.add_trace(fig1.data[1], secondary_y=True)
# fig.update_yaxes(range=[0, dr.Cum_count.max()*1.5],
# title="Deaths / Recoveries", tickfont_size=10, secondary_y=True)
# fig.update_yaxes(title="Active Cases", showgrid=False, tickfont_size=10, secondary_y=False)
# fig.update_layout(title_text="PH Covid-19 Cumulative Number of Cases")
# fig.update_layout(title=dict(x=0.5),
# xaxis=dict(tickmode='linear', #tick0=0,
# dtick=0, tickfont_size=8, title="Date"),
# legend=dict(orientation="v", title="",
# x=0.02, y=0.7, yanchor="bottom"),
# hovermode='x')
# fig.show()
# Compute trend based on Hodrick-Prescott Filter
import statsmodels.api as sm
c1 = c[c.Date >= '2020-03-15']
d1 = d[d.Date >= '2020-03-15']
r1 = r[r.Date >= '2020-03-15']
c1 = c1.reset_index(drop=True)
cycle, trend = sm.tsa.filters.hpfilter(c1['%Change'], 7)
c1['trend'] = trend
d1 = d1.reset_index(drop=True)
cycle, trend = sm.tsa.filters.hpfilter(d1['%Change'], 7)
d1['trend'] = trend
r1 = r1.reset_index(drop=True)
cycle, trend = sm.tsa.filters.hpfilter(r1['%Change'], 7)
r1['trend'] = trend
print('-------------------------------- Confirmed Cases --------------------------------')
print('1-day Percent Growth:', str(round(c1['%Change'].tail(1).values[0],1))+'%')
print('7-day Average Percent Growth:', str(round(c1['%Change'].tail(7).mean(),1))+'%')
print('30-day Average Percent Growth:', str(round(c1['%Change'].tail(30).mean(),1))+'%')
# Figure 1
c1['trend_name'] = 'Trend (HP Filter)'
fig_trend = px.line(c1, x='Date', y='trend', color='trend_name',
color_discrete_sequence=["cadetblue"])
fig2 = px.line(c1, x='Date', y='%Change', color='Type',
color_discrete_sequence=["cadetblue"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="%Change", showgrid=False, tickfont_size=10)
fig2.update_layout(title=dict(x=0.5, text="%Change on Daily Confirmed Cases"),
xaxis=dict(tickmode='array', #tick0=4, dtick=0,
tickfont_size=9, title="Date", showgrid=False),
legend=dict(orientation="v", title="",
x=0.75, y=0.85, yanchor="bottom"),
hoverlabel=dict(font_size=20))
# fig2.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
# xanchor='left', yanchor='middle', ax=0, ay=0,
# x=pd.to_datetime(c1.Date.unique().max()),
# y=round(c1['trend'][-1:].values[0],1),
# text=" "*2 + round(c1['trend'][-1:].values[0],1).astype('str')+'%',
# font_size=14, font_color='cadetblue'))
fig2.show()
print('--------------------------------Reported Deaths--------------------------------')
print('1-day Percent Growth:', str(round(d1['%Change'].tail(1).values[0],1))+'%')
print('7-day Average Percent Growth:', str(round(d1['%Change'].tail(7).mean(),1))+'%')
print('30-day Average Percent Growth:', str(round(d1['%Change'].tail(30).mean(),1))+'%')
# Figure 2
d1['trend_name'] = 'Trend (HP Filter)'
fig_trend = px.line(d1, x='Date', y='trend', color='trend_name', color_discrete_sequence=["tomato"])
fig2 = px.line(d1, x='Date', y='%Change', color='Type',
color_discrete_sequence=["tomato"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="%Change", showgrid=False, tickfont_size=10)
fig2.update_layout(title=dict(x=0.5, text="%Change on Daily Reported Deaths"),
xaxis=dict(tickmode='array', #tick0=4, dtick=0,
tickfont_size=9, title="Date", showgrid=False),
legend=dict(orientation="v", title="",
x=0.75, y=0.85, yanchor="bottom"),
hoverlabel=dict(font_size=20))
# fig2.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
# xanchor='left', yanchor='middle', ax=0, ay=0,
# x=pd.to_datetime(d1.Date.unique().max()),
# y=round(d1['trend'][-1:].values[0],1),
# text=" "*2 + round(d1['trend'][-1:].values[0],1).astype('str')+'%',
# font_size=14, font_color='tomato'))
fig2.show()
print('-------------------------------- Reported Recoveries --------------------------------')
print('1-day Percent Growth:', str(round(r1['%Change'].tail(1).values[0],1))+'%')
print('7-day Average Percent Growth:', str(round(r1['%Change'].tail(7).mean(),1))+'%')
print('30-day Average Percent Growth:', str(round(r1['%Change'].tail(30).mean(),1))+'%')
# Figure 3
r1['trend_name'] = 'Trend (HP Filter)'
fig_trend = px.line(r1, x='Date', y='trend', color='trend_name', color_discrete_sequence=["limegreen"])
fig2 = px.line(r1, x='Date', y='%Change', color='Type',
color_discrete_sequence=["limegreen"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="%Change", showgrid=False, tickfont_size=10)
fig2.update_layout(title=dict(x=0.5, text="%Change on Daily Reported Recoveries"),
xaxis=dict(tickmode='array', #tick0=4, dtick=0,
tickfont_size=9, title="Date", showgrid=False),
legend=dict(orientation="v", title="",
x=0.75, y=0.85, yanchor="bottom"),
hoverlabel=dict(font_size=20))
# fig2.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
# xanchor='left', yanchor='middle', ax=0, ay=0,
# x=pd.to_datetime(d1.Date.unique().max()),
# y=round(r1['trend'][-1:].values[0],1),
# text=" "*2 + round(r1['trend'][-1:].values[0],1).astype('str')+'%',
# font_size=14, font_color='limegreen'))
fig2.show()
active_daily = np.maximum(cases_daily - recoveries_daily - deaths_daily, 0).fillna(0)
active_rate_daily = (active_daily.cumsum() / cases_daily.cumsum())*100
death_rate_daily = (deaths_daily.cumsum() / cases_daily.cumsum())*100
recovery_rate_daily = (recoveries_daily.cumsum() / cases_daily.cumsum())*100
# a1 = active_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
# a1['Type'] = 'Active Rate'
# d1 = death_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
# d1['Type'] = 'Death Rate'
# r1 = recovery_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
# r1['Type'] = 'Recovery Rate'
# adr1 = pd.concat([a1, d1, r1])
# # March 16 - start of ECQ
# a2 = a1[46:]
# d2 = d1[46:]
# r2 = r1[46:]
# adr2 = pd.concat([a2, d2, r2])
# fig = px.bar(adr2, x='Date', y='Rate (%)', color='Type')
# fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)
# fig.update_yaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)
# #fig.update_layout(title_text="PH Covid-19 Cumulative Number of Cases")
# fig.update_layout(title=dict(x=0.5),
# xaxis=dict(tickmode='linear', #tick0=0,
# dtick=0, tickfont_size=8, title="Date"),
# legend=dict(orientation="h", title=" ",
# x=-0.02, y=1, yanchor="bottom"))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
# xanchor='left', yanchor='bottom', ax=0, ay=20,
# x=pd.to_datetime(adr2.Date.unique().min()), y=0,
# text='Start of ECQ'), font_size=11)
# fig.show()
aaa1 = active_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
aaa1['Type'] = 'Active Rate'
ddd1 = death_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
ddd1['Type'] = 'Death Rate'
rrr1 = recovery_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
rrr1['Type'] = 'Recovery Rate'
adr1 = pd.concat([aaa1, ddd1, rrr1])
# March 16 - start of ECQ
aaa2 = aaa1[46:]
ddd2 = ddd1[46:]
rrr2 = rrr1[46:]
adr2 = pd.concat([aaa2, ddd2, rrr2])
fig = px.line(adr2, x='Date', y='Rate (%)', color='Type')
fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=True, showgrid=True, zeroline=False, showline=False)
# fig.update_layout(title_text="PH Covid-19 Cumulative Number of Cases")
fig.update_layout(title=dict(x=0.5), showlegend=True,
xaxis=dict(tickmode='array', #tick0=0,
dtick=0, tickfont_size=8, title="Date"),
yaxis=dict(tickfont_size=10),
legend=dict(orientation="h", title=" ",
x=-0.02, y=1, yanchor="bottom"),
hoverlabel=dict(font_size=20))
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
xanchor='left', yanchor='bottom', ax=0, ay=20,
x=pd.to_datetime(adr2.Date.unique().min()), y=0,
text='Start of ECQ'), font_size=11)
# Latest Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
xanchor='left', yanchor='middle', ax=0, ay=0,
x=pd.to_datetime(adr2.Date.unique().max()),
y=aaa2['Rate (%)'][-1:].values[0],
text=" "*2 + round(aaa2['Rate (%)'][-1:].values[0],1).astype('str')+'%',
font_size=15, font_color=px.colors.qualitative.Plotly[0]))
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
xanchor='left', yanchor='middle', ax=0, ay=0,
x=pd.to_datetime(adr2.Date.unique().max()),
y=ddd2['Rate (%)'][-1:].values[0],
text=" "*2 + round(ddd2['Rate (%)'][-1:].values[0],1).astype('str')+'%',
font_size=15, font_color=px.colors.qualitative.Plotly[1]))
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
xanchor='left', yanchor='middle', ax=0, ay=0,
x=pd.to_datetime(adr2.Date.unique().max()),
y=rrr2['Rate (%)'][-1:].values[0],
text=" "*2 + round(rrr2['Rate (%)'][-1:].values[0],1).astype('str')+'%',
font_size=15, font_color=px.colors.qualitative.Plotly[2]))
# Legend name
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
# xanchor='right', yanchor='middle', ax=0, ay=0,
# x=pd.to_datetime(adr2.Date.unique().min()),
# y=a2['Rate (%)'][:1].values[0],
# text="Active Rate"+" ",
# font_size=14))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
# xanchor='right', yanchor='middle', ax=0, ay=0,
# x=pd.to_datetime(adr2.Date.unique().min()),
# y=d2['Rate (%)'][:1].values[0],
# text="Death Rate"+" ",
# font_size=14))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
# xanchor='right', yanchor='middle', ax=0, ay=0,
# x=pd.to_datetime(adr2.Date.unique().min()),
# y=r2['Rate (%)'][:1].values[0],
# text="Recovery Rate"+" ",
# font_size=14))
fig.show()
# Figure 0
data = covid_ph.groupby('Sex')['CaseCode'].count().reset_index().sort_values('CaseCode', ascending=False).rename({'CaseCode':'Count'}, axis=1).reset_index(drop=True)
data['Percent'] = (data.Count / data.Count.sum())*100
data['Pct'] = round(data['Percent'],1).astype('str') + '%'
fig = px.bar(data, x='Percent', orientation='h', color='Sex',
text='Count', hover_name='Pct', height=250, labels={'Percent': ''})
fig.update_layout(legend=dict(orientation='h', title=""), hovermode=False, font_size=14,
title=("Total Confirmed Cases: " + f"<b>{total_cases}</b>"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=20))
#for i in range(0, len(data.Percent.unique())):
for i in [0, 1]:
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=data.Percent.cumsum().unique()[i]-0.001, y=0.6,
text=data.Pct[i]), font_size=16,
font_color = px.colors.qualitative.Plotly[i])
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
# Figure 1 & 2
data1 = covid_ph.groupby(['Sex','DateRepConf'])['CaseCode'].count().reset_index().rename({'CaseCode':'Count'}, axis=1)
date_f = active_daily.index.to_frame().reset_index(drop='True').rename({0: 'DateRepConf'}, axis=1)
date_f['Sex'] = 'Female'
data_f = pd.merge(date_f, data1[data1.Sex == 'Female'].iloc[:,1:], how='left', on='DateRepConf').fillna(0)
data_f['Cum_count'] = data_f.Count.cumsum()
date_m = active_daily.index.to_frame().reset_index(drop='True').rename({0: 'DateRepConf'}, axis=1)
date_m['Sex'] = 'Male'
data_m = pd.merge(date_m, data1[data1.Sex == 'Male'].iloc[:,1:], how='left', on='DateRepConf').fillna(0)
data_m['Cum_count'] = data_m.Count.cumsum()
data2 = pd.concat([data_m, data_f]).reset_index(drop=True)
data2['Percent'] = data2.groupby('DateRepConf')['Cum_count'].apply(lambda x: 100 * x / float(x.sum())).values
data3 = data2[data2.DateRepConf > '2020-03-15'] # Filter start ECQ
fig1 = px.line(data3, x='DateRepConf', y='Cum_count', color='Sex')
fig2 = px.bar(data3, x='DateRepConf', y='Percent', color='Sex')
fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Cumulative Count by Sex", "Cumulative Sex Ratio"))
for loop in range(0, len(data.Sex.unique())):
fig000.add_trace(fig1.data[loop], row=1, col=1)
fig000.add_trace(fig2.data[loop], row=1, col=2)
fig000.update_traces(showlegend=True, row=1, col=1)
fig000.update_traces(showlegend=False, row=1, col=2)
fig000.update_layout(barmode='stack',
xaxis1=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"),
xaxis2=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"),
yaxis1=dict(tickfont_size=8), yaxis2=dict(tickfont_size=8))
# fig000.update_xaxes(showticklabels=False, showgrid=False,
# zeroline=False, showline=False)
# fig000.update_yaxes(showticklabels=False, showgrid=False,
# zeroline=False, showline=False, row=1, col=2)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=400,
legend=dict(orientation="h", x=-0.05, y=-0.4,
yanchor="bottom"), hoverlabel=dict(font_size=20))
fig000.show()
# Figure 3 & 4
data2 = pd.concat([data_m, data_f]).reset_index(drop=True)
data2['Percent'] = data2.groupby('DateRepConf')['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data3 = data2[data2.DateRepConf > '2020-03-15'] # Filter start ECQ
fig1 = px.line(data3, x='DateRepConf', y='Count', color='Sex')
fig2 = px.bar(data3, x='DateRepConf', y='Percent', color='Sex')
fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Daily Case Count by Sex", "Daily Case Count Sex Ratio"))
for loop in range(0, len(data.Sex.unique())):
fig000.add_trace(fig1.data[loop], row=1, col=1)
fig000.add_trace(fig2.data[loop], row=1, col=2)
fig000.update_traces(showlegend=True, row=1, col=1)
fig000.update_traces(showlegend=False, row=1, col=2)
fig000.update_layout(barmode='stack',
xaxis1=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"),
xaxis2=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"),
yaxis1=dict(tickfont_size=8), yaxis2=dict(tickfont_size=8))
# fig000.update_xaxes(showticklabels=False, showgrid=False,
# zeroline=False, showline=False)
# fig000.update_yaxes(showticklabels=False, showgrid=False,
# zeroline=False, showline=False, row=1, col=2)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=400,
legend=dict(orientation="h", x=-0.05, y=-0.4,
yanchor="bottom"), hoverlabel=dict(font_size=20))
fig000.show()
print("Note: There are", covid_ph.Sex.isnull().sum(), "cases with no sex information")
# Figure 01
data01 = covid_ph.groupby(['Sex','RemovalType'])['CaseCode'].count().reset_index().sort_values('CaseCode', ascending=False).rename({'CaseCode':'Count'}, axis=1)
data01['Percent'] = data01.groupby('RemovalType')['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data01['Pct'] = round(data01['Percent'],1).astype('str') + '%'
fig = px.bar(data01, x='Percent', orientation='h', color='Sex',
text='Count', hover_name='Pct', hover_data=None, height=350)
fig.update_layout(legend=dict(orientation='h', x=0.09, y=0, title=""), hoverlabel=dict(font_size=20))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=18))
for i in range(0, len(data01.RemovalType.unique())):
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=0, y=i,
text=data01.RemovalType.unique()[i] + " "), font_size=15)
fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=8, title=None)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
# df = covid_ph.fillna("no info")
age = covid_ph.groupby(['AgeGroup','Sex'])['CaseCode'].count().reset_index().rename({'CaseCode': 'Count'},axis=1)
age['AgeGroup'] = age.AgeGroup.str.replace('5 to 9', '05 to 09', case=True, regex=False)
age1 = age.sort_values(['Sex','AgeGroup'], ascending=[False, True]).reset_index(drop='True')
fig1 = px.bar(age1, x='AgeGroup', y='Count', color='Sex', barmode='group',
title='Age Group Distribution by Sex')
age2 = age1.groupby('AgeGroup').sum().reset_index()
age2['Total'] = 'Total Cases'
fig2 = px.line(age2, x='AgeGroup', y='Count', color='Total', line_shape='spline')
fig2.data[0].update(mode='markers+lines')
fig2.update_traces(line_color='lightgreen', showlegend=True, text="Count")
fig1.add_trace(fig2.data[0])
fig1.update_layout(legend=dict(orientation="h", title="",
x=0.65, y=0.9,
yanchor="bottom"), hoverlabel=dict(font_size=20))
fig1.show()
pcnt = round((covid_ph.AgeGroup.isnull().sum() / covid_ph.shape[0])*100,2).astype(str) + "%"
print("*Note: There are", covid_ph.AgeGroup.isnull().sum(), "(", pcnt,")", "cases with no age information")
# df = covid_ph.copy()
# df['RemovalType'] = df.RemovalType.fillna('Active')
# age_type = df.groupby(['AgeGroup','RemovalType'])['CaseCode'].count().reset_index().rename({'CaseCode': 'Count'}, axis=1)
# age_type['AgeGroup'] = age_type.AgeGroup.str.replace('5 to 9', '05 to 09', case=True, regex=False)
# age_type1 = age_type.sort_values(['AgeGroup', 'RemovalType'], ascending=True).reset_index(drop='True')
# age_type1['Percent'] = age_type1.groupby(['RemovalType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
# age_type1
# age_sex_type = df.groupby(['AgeGroup', 'Sex', 'RemovalType'])['CaseCode'].count().reset_index().rename({'CaseCode': 'Count'}, axis=1)
# age_sex_type['AgeGroup'] = age_sex_type.AgeGroup.str.replace('5 to 9', '05 to 09', case=True, regex=False)
# age_sex_type1 = age_sex_type.sort_values(['AgeGroup', 'Sex', 'RemovalType'], ascending=True).reset_index(drop='True')
# age_sex_type1['Percent'] = age_sex_type1.groupby(['Sex','RemovalType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
# age_sex_type1.sort_values(['RemovalType','AgeGroup','Sex'], ascending=[True, True, False], inplace=True)
# fig1 = px.line(age_type1, x='AgeGroup', y='Percent', color='RemovalType',
# line_shape="spline", hover_name='Count',
# labels={'RemovalType': "PatientType"},
# title='Age Group Distribution by Patient Type')
# # fig1 = px.bar(age_type1, x='AgeGroup', y='Percent', color='RemovalType',
# # barmode='group', labels={'RemovalType': " "},
# # title='Age Distribution by Patient Type')
# for i in [0,1,2]:
# fig1.data[i].update(mode='markers+lines')
# fig1.update_layout(legend=dict(orientation="h", title="",
# x=0.03, y=0.9,
# yanchor="bottom"), hovermode='x')
# fig1.show()
# # fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Male", "Female"))
# # figM = px.bar(age_sex_type1[age_sex_type1.Sex == 'Male'],
# # x='AgeGroup', y='Percent', color='RemovalType',
# # barmode='group', labels={'RemovalType': " "})
# # figF = px.bar(age_sex_type1[age_sex_type1.Sex == 'Female'],
# # x='AgeGroup', y='Percent', color='RemovalType',
# # barmode='group', labels={'RemovalType': " "})
# # for loop in range(0, len(age_sex_type1.RemovalType.unique())):
# # fig000.add_trace(figM.data[loop], row=1, col=1)
# # fig000.add_trace(figF.data[loop], row=1, col=2)
# # fig000.update_traces(showlegend=True, row=1, col=1)
# # fig000.update_traces(showlegend=False, row=1, col=2)
# # fig000.update_layout(legend=dict(orientation="h", title="",
# # x=-0.05, y=-0.5,
# # yanchor="bottom"), height=400)
# # fig000.show()
df_recovered = covid_ph.loc[(covid_ph.RemovalType=='Recovered')].reset_index(drop=True)
df_died = covid_ph.loc[(covid_ph.RemovalType=='Died')].reset_index(drop=True)
df = pd.concat([df_recovered, df_died])
fig = px.histogram(df, x='Age', color='RemovalType', opacity=0.8, nbins=20,
marginal='box', barmode='overlay',
labels={"RemovalType": "PatientType"},
title='Age Distribution of Recovered vs Died Patients')
fig.update_layout(legend=dict(orientation="h", title="",
x=0.03, y=0.65, yanchor="bottom"),
xaxis=dict(tickmode='linear', dtick=10))
fig.add_annotation(dict(xref="x", yref="paper", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=df_recovered.Age.median()+1, y=0.86,
text=int(df_recovered.Age.median()), font_size=11,
font_color = px.colors.qualitative.Plotly[0]))
fig.add_annotation(dict(xref="x", yref="paper", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=df_died.Age.median()+1, y=0.99,
text=int(df_died.Age.median()), font_size=11,
font_color = px.colors.qualitative.Plotly[1]))
fig.show()
df_recovered = covid_ph.loc[(covid_ph.RemovalType=='Recovered')].reset_index(drop=True)
df_died = covid_ph.loc[(covid_ph.RemovalType=='Died')].reset_index(drop=True)
df = pd.concat([df_recovered, df_died])
# Recovered Patients
fig1 = px.histogram(df_recovered, x='Age', color='Sex', opacity=0.8,
marginal='box', barmode='overlay',
labels={"RemovalType": "PatientType"},
title='Age Distribution of Recovered Patients')
fig1.update_layout(legend=dict(orientation="h", title="", x=0.03, y=0.65, yanchor="bottom"),
xaxis=dict(tickmode='linear', dtick=10))
fig1.add_annotation(dict(xref="x", yref="paper", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=df_recovered[df_recovered.Sex == 'Male'].Age.median()+1, y=0.86,
text=int(df_recovered[df_recovered.Sex == 'Male'].Age.median()), font_size=11,
font_color = px.colors.qualitative.Plotly[0]))
fig1.add_annotation(dict(xref="x", yref="paper", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=df_recovered[df_recovered.Sex == 'Female'].Age.median()+1, y=0.99,
text=int(df_recovered[df_recovered.Sex == 'Female'].Age.median()), font_size=11,
font_color = px.colors.qualitative.Plotly[1]))
fig1.show()
fig2 = px.histogram(df_died, x='Age', color='Sex', opacity=0.8,
marginal='box', barmode='overlay',
labels={"RemovalType": "PatientType"},
title='Age Distribution of Died Patients')
fig2.update_layout(legend=dict(orientation="h", title="", x=0.03, y=0.65, yanchor="bottom"),
xaxis=dict(tickmode='linear', dtick=10))
fig2.add_annotation(dict(xref="x", yref="paper", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=df_died[df_died.Sex == 'Male'].Age.median()+1, y=0.86,
text=int(df_died[df_died.Sex == 'Male'].Age.median()), font_size=11,
font_color = px.colors.qualitative.Plotly[0]))
fig2.add_annotation(dict(xref="x", yref="paper", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=df_died[df_died.Sex == 'Female'].Age.median()+1, y=0.99,
text=int(df_died[df_died.Sex == 'Female'].Age.median()), font_size=11,
font_color = px.colors.qualitative.Plotly[1]))
fig2.show()
df_regres = covid_ph.groupby('ProvRes').count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres['PH'] = 'Philippines'
df_regres.rename({'CaseCode': 'Count'}, inplace=True, axis=1)
df_regres['Percentage'] = round((df_regres.Count / df_regres.Count.sum())*100,1).astype('str')+'%'
df_regres = df_regres[df_regres.ProvRes != 'For Validation']
df_regres1 = covid_ph.groupby(['ProvRes', 'CityMunRes']).count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres1['PH'] = 'Philippines'
df_regres1.rename({'CaseCode': 'Count'}, inplace=True, axis=1)
df_regres1['Percentage'] = round((df_regres1.Count / df_regres1.Count.sum())*100,1).astype('str')+'%'
df_regres1 = df_regres1[df_regres1.ProvRes != 'For Validation']
df_regres1 = df_regres1[df_regres1.CityMunRes != 'For Validation']
top = 10
fig1 = px.bar(df_regres.head(top), x="Count", y="ProvRes",
orientation='h', hover_name='Percentage',
text="Count",
title="Provinces with Most Cases")
fig1.update_traces(textposition='outside', textfont_size=12)
fig1.update_layout(title=dict(x=0.5), yaxis=dict(categoryorder='total ascending'),
showlegend = False, legend_orientation="v")
fig2 = px.bar(df_regres1.head(top), x="Count", y="CityMunRes",
orientation='h', hover_name='Percentage',
text="Count",
title="City/Municipalities with Most Cases")
fig2.update_traces(textposition='outside', textfont_size=12)
fig2.update_layout(title=dict(x=0.5), yaxis=dict(categoryorder='total ascending'),
showlegend = False, legend_orientation="v")
#fig1.show()
#fig2.show()
fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Provinces with Most Cases", "Cities with Most Cases"))
fig000.add_trace(fig1.data[0], row=1, col=1)
fig000.add_trace(fig2.data[0], row=1, col=2)
fig000.update_layout(yaxis1=dict(categoryorder='total ascending'), yaxis2=dict(categoryorder='total ascending'), height=500)
fig000.update_xaxes(range=(0, round(max(df_regres.Count) + max(df_regres.Count)/4)), row=1, col=1)
fig000.update_xaxes(range=(0, round(max(df_regres1.Count) + max(df_regres1.Count)/4)), row=1, col=2)
fig000.update_layout(xaxis={"domain": [0.05, 0.40]}, xaxis2={"domain": [0.6, 1]},
hoverlabel=dict(font_size=15))
fig000.show()
z1 = covid_ph[covid_ph.RegionRes.isnull()].shape[0]
z2 = covid_ph[covid_ph.ProvRes == 'For Validation'].shape[0]
print('Note:')
print('1. There are', z1, 'cases no information about residence.')
print('2. There are', z2, '('+str(round(z2/covid_ph.shape[0],1)*100)+'%'+')' , 'cases with region but no specific province/city of residence (tagged as "For Validation").')
fig = px.treemap(df_regres,
path=['PH','ProvRes'],
values='Count',
hover_name='Percentage',
title='Case Count by Province of Residence',
branchvalues='total', height=700)
fig.update_layout(font_size=14)
fig.update_layout(hoverlabel=dict(font_size=15))
fig.show()
fig1 = px.treemap(df_regres1,
path=['PH','ProvRes','CityMunRes'],
values='Count',
hover_name='Percentage',
title='Case Count by City/Municipality of Residence',
branchvalues='total', height=700)
#fig1.data[0].textinfo = 'label+text+value'
#fig1.layout.hovermode = False
fig1.update_layout(font_size=14)
fig1.update_layout(hoverlabel=dict(font_size=14))
fig1.show()
a = covid_ph.RegionRes.isnull().sum()
b = round(((a / covid_ph.shape[0])*100),1).astype(str) +'%'
import requests
import json
# PHILIPPINES
geojson = json.loads(requests.get("https://raw.githubusercontent.com/macoymejia/geojsonph/master/Province/Provinces.minimal.json").text)
# NCR
geojson1 = json.loads(requests.get("https://raw.githubusercontent.com/macoymejia/geojsonph/master/Philippines/Luzon/Metropolitant%20Manila/MetropolitantManila.json").text)
x = []
for i in range(0, len(geojson['features'])):
z = list(geojson['features'][i].values())[1]['PROVINCE']
x.append(z)
p = pd.DataFrame(x).rename({0: 'Province'}, axis=1)
df_regres0 = df_regres.copy()
df_regres0['ProvRes'] = df_regres0['ProvRes'].str.title()
df_regres0['ProvRes'] = df_regres0['ProvRes'].replace({'Metro Manila': 'Metropolitan Manila',
'Davao Del Sur': 'Davao del Sur',
'Zamboanga Del Sur': 'Zamboanga del Sur',
'Davao Del Norte': 'Davao del Norte',
'Lanao Del Sur': 'Lanao del Sur',
'Lanao Del Norte': 'Lanao del Norte',
'Agusan Del Norte': 'Agusan del Norte',
'Cotabato': 'North Cotabato',
'Cebu Province': 'Cebu',
'Iloilo Province': 'Iloilo',
'Tarlac Province': 'Tarlac'})
df_regres0 = df_regres0.reset_index(drop=True)
# CHECKER
prov = pd.merge(p, df_regres0, how='outer', left_on='Province', right_on='ProvRes')
prov['Count_log'] = np.log10(prov['Count'])
prov['Count_log'] = prov['Count_log'].fillna(0)
prov['Count'] = prov['Count'].fillna(0)
# prov.tail(10)
Geo Mapping
fig = px.choropleth(prov, geojson=geojson, color="Count_log",
locations="Province", featureidkey="properties.PROVINCE",
hover_name='Count',
projection="mercator",
labels={'Count_log': 'log(Count)'},
title='Number of Covid-19 Confirmed Cases in the Philippines (as of ' + str(asof.date().strftime("%b %d, %Y")) + ")" ,
color_continuous_scale=px.colors.sequential.OrRd,
height=1100)
fig.update_layout(hoverlabel=dict(font_size=20, bgcolor='white', bordercolor='red'))
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
df_ncr = df_regres1[df_regres1['ProvRes'] == 'METRO MANILA'].reset_index(drop=True)
df_ncr['CityMunRes'] = df_ncr['CityMunRes'].replace({'QUEZON CITY': 'Quezon City',
'CITY OF MANILA': 'Manila',
'CITY OF PARAÑAQUE': 'Parañaque',
'CITY OF MAKATI': 'Makati City',
'CITY OF MANDALUYONG': 'Mandaluyong',
'CITY OF PASIG': 'Pasig City',
'TAGUIG CITY': 'Taguig',
'CITY OF SAN JUAN': 'San Juan',
'CALOOCAN CITY': 'Kalookan City',
'PASAY CITY': 'Pasay City',
'CITY OF LAS PIÑAS': 'Las Piñas',
'CITY OF MUNTINLUPA': 'Muntinlupa',
'CITY OF MARIKINA': 'Marikina',
'CITY OF VALENZUELA': 'Valenzuela',
'CITY OF MALABON': 'Malabon',
'CITY OF NAVOTAS': 'Navotas',
'PATEROS': 'Pateros'})
# CHECKER
z = []
for i in range(0, len(geojson1['features'])):
y = list(geojson1['features'][i].values())[1]['NAME_2']
z.append(y)
p = pd.DataFrame(z).rename({0: 'City'}, axis=1)
p
check = pd.merge(df_ncr, p, how='left', left_on='CityMunRes', right_on='City')
# check
fig = px.choropleth(df_ncr, geojson=geojson1, color="Count",
locations="CityMunRes", featureidkey="properties.NAME_2",
hover_name='Count',
projection="mercator",
labels={'CityMunRes': 'City'},
title='Number of Covid-19 Confirmed Cases by NCR Cities (as of ' + str(asof.date().strftime("%b %d, %Y")) + ")" ,
color_continuous_scale=px.colors.sequential.OrRd,
height=1100)
fig.update_layout(hoverlabel=dict(font_size=20, bgcolor='white', bordercolor='red'))
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
print("As of", asof.date().strftime("%b %d, %Y"))
top = 10
topreg = df_regres.head(top).ProvRes.tolist()
df_regres_daily = covid_ph.groupby(['ProvRes', 'DateRepConf']).count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres_daily.sort_values(by=['ProvRes','DateRepConf'], inplace=True)
#df_regres_daily['ProvRes'] = df_regres_daily['ProvRes'].replace({'NCR': 'Metropolitan Manila'})
#df_regres_daily['Cum_count'] = df_regres_daily.groupby('ProvRes')['CaseCode'].cumsum()
df_regres_daily = df_regres_daily[df_regres_daily.ProvRes != 'For Validation']
df_regres_daily1 = pd.DataFrame()
for i in topreg:
a = df_regres_daily[df_regres_daily.ProvRes == i].rename({'CaseCode': 'Count'}, axis=1)
base = pd.DataFrame(covid_ph.DateRepConf.sort_values().unique())
base.rename({0: 'DateRepConf'}, axis=1, inplace=True)
# base = base[base.DateRepConf >= '2020-03-06']
q = pd.merge(base, a, how='left', on='DateRepConf')
q['ProvRes'] = i
q['Count'] = q.Count.fillna(0)
q['Cum_count'] = q.Count.cumsum()
# q['Percent_change'] = q.Cum_count.pct_change()*100
df_regres_daily1 = df_regres_daily1.append(q)
df_regres_daily1
# Figure 1
fig0 = px.line(df_regres_daily1, x='DateRepConf', y='Cum_count',
color='ProvRes', labels={'Cum_count': 'Cumulative Count'},
title="Cumulative Case Count on Location with Most Cases")
fig0.update_layout(showlegend=True)
a = pd.DataFrame(topreg).rename({0: 'RegRes'}, axis=1)
# for i in topreg[:2]:
# yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
# fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
# arrowhead=0, xanchor='left', yanchor='middle',
# ax=0, ay=0,
# x=df_regres_daily1.DateRepConf.iloc[-1],
# y=yyy,
# text=" "*1 + str(yyy.astype('int')),
# font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
# fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
# arrowhead=0, xanchor='left', yanchor='middle',
# ax=0, ay=0,
# x=df_regres_daily1.DateRepConf.iloc[-1],
# y=yyy,
# text=" "*10 + i, #PLEASE EDIT IF NEEDED
# font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
fig0.show()
# Figure 2
fig1 = px.line(df_regres_daily1[~df_regres_daily1.ProvRes.isin(topreg[:2])], x='DateRepConf', y='Cum_count',
color='ProvRes', labels={'Cum_count': 'Cumulative Count'},
title="Cumulative Case Count on Location with Most Cases (excluding NCR & Cebu Province)",
color_discrete_sequence=px.colors.qualitative.Plotly[2:])
fig1.update_layout(showlegend=False)
a = pd.DataFrame(topreg).rename({0: 'RegRes'}, axis=1)
# for i in topreg[1:2]:
# yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
# fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
# arrowhead=0, xanchor='left', yanchor='middle',
# ax=0, ay=0,
# x=df_regres_daily1.DateRepConf.iloc[-1],
# y=yyy+5,
# text=" "*1 + str(yyy.astype('int')),
# font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
# fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
# arrowhead=0, xanchor='left', yanchor='middle',
# ax=0, ay=0,
# x=df_regres_daily1.DateRepConf.iloc[-1],
# y=yyy+5,
# text=" "*8 + i,
# font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
for i in topreg[2:5]:
yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy,
text=" "*1 + str(yyy.astype('int')),
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy,
text=" "*8 + i,
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
for i in topreg[5:6]:
yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy,
text=" "*1 + str(yyy.astype('int')),
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy,
text=" "*8 + i,
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
for i in topreg[6:7]:
yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy+5,
text=" "*1 + str(yyy.astype('int')),
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy+5,
text=" "*8 + i,
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
for i in topreg[7:8]:
yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy,
text=" "*1 + str(yyy.astype('int')),
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy,
text=" "*8 + i,
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
for i in topreg[8:9]:
yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy,
text=" "*1 + str(yyy.astype('int')),
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy,
text=" "*8 + i,
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
for i in topreg[9:10]:
yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy,
text=" "*1 + str(yyy.astype('int')),
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='left', yanchor='middle',
ax=0, ay=0,
x=df_regres_daily1.DateRepConf.iloc[-1],
y=yyy,
text=" "*8 + i,
font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
# fig1.show()
fig1 = px.line(df_regres_daily1, x='DateRepConf', y='Cum_count', log_y=True, line_shape='spline',
color='ProvRes', labels={'Cum_count': 'log(Cumulative Count)'},
title="Cumulative Case Count on Location with Most Cases (Log Scale)")
fig1.update_layout(showlegend=True)
fig1.show()
df_ncr = df_regres1[df_regres1['ProvRes'] == 'METRO MANILA']
topcity = df_ncr.CityMunRes.tolist()
df_city_daily = covid_ph.groupby(['CityMunRes', 'DateRepConf']).count()['CaseCode'].sort_values(ascending=False).reset_index()
df_city_daily.sort_values(by=['CityMunRes','DateRepConf'], inplace=True)
df_city_daily1 = pd.DataFrame()
for i in topcity:
a = df_city_daily[df_city_daily.CityMunRes == i].rename({'CaseCode': 'Count'}, axis=1)
base = pd.DataFrame(covid_ph.DateRepConf.sort_values().unique())
base.rename({0: 'DateRepConf'}, axis=1, inplace=True)
# base = base[base.DateRepConf >= '2020-03-06']
q = pd.merge(base, a, how='left', on='DateRepConf')
q['CityMunRes'] = i
q['Count'] = q.Count.fillna(0)
q['Cum_count'] = q.Count.cumsum()
# q['Percent_change'] = q.Cum_count.pct_change()*100
df_city_daily1 = df_city_daily1.append(q)
df_city_daily1
# Figure 1
fig0 = px.line(df_city_daily1, x='DateRepConf', y='Cum_count',
color='CityMunRes', labels={'Cum_count': 'Cumulative Count'},
title="Cumulative Case Count in NCR Cities")
fig0.update_layout(showlegend=True)
fig0.show()
# Figure 2
fig1 = px.line(df_city_daily1, x='DateRepConf', y='Cum_count', log_y=True,
color='CityMunRes', labels={'Cum_count': 'log(Cumulative Count)'},
title="Cumulative Case Count in NCR Cities (Log Scale)")
fig1.update_layout(showlegend=True)
fig1.show()
# a = pd.DataFrame(topcity).rename({0: 'RegRes'}, axis=1)
# for i in topcity:
# yyy = df_city_daily1[(df_city_daily1.CityMunRes == i) & (df_city_daily1.DateRepConf == pd.to_datetime(df_city_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
# fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
# arrowhead=0, xanchor='left', yanchor='middle',
# ax=0, ay=0,
# x=df_city_daily1.DateRepConf.iloc[-1],
# y=yyy,
# text=" "*1 + str(yyy.astype('int')),
# font_color=px.colors.qualitative.Plotly[topcity.index(i)]))
# fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
# arrowhead=0, xanchor='left', yanchor='middle',
# ax=0, ay=0,
# x=df_city_daily1.DateRepConf.iloc[-1],
# y=yyy,
# text=" "*12 + i,
# font_color=px.colors.qualitative.Plotly[topcity.index(i)]))
Note: Trendline (in blue) is based on 7-day Moving Average.
# for i in topreg:
# df = df_regres_daily1[df_regres_daily1.ProvRes == i].reset_index(drop=True)
# cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
# df['Trend_HP'] = trend1
# df['Trend_MA7'] = df['Count'].rolling(7).mean()
# df['Trend_MA3'] = df['Count'].rolling(3).mean()
# fig1 = px.bar(df[df.ProvRes == i], x='DateRepConf', y='Count', color_discrete_sequence=['lightblue'])
# fig1_trend = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_MA7', color_discrete_sequence=["blue"])
# fig1.add_trace(fig1_trend.data[0])
# # fig1_trend1 = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_MA3', color_discrete_sequence=["green"])
# # fig1.add_trace(fig1_trend1.data[0])
# fig1.update_yaxes(title="Case Count", tickfont_size=10)
# fig1.update_layout(title=dict(x=0.5, text="Daily New Cases in " + f"<b>{i}</b>"), height=400,
# xaxis=dict(tickmode='linear', dtick=0, tickfont_size=9, title=None),
# legend=dict(orientation="v", title="",
# x=0.05, y=0.8, yanchor="bottom"),
# hoverlabel=dict(font_size=15))
# fig1.show()
# zzz = df_regres.copy()
# zzz['ProvRes1'] = zzz['ProvRes'] + ": " + zzz['Count'].astype(str)
# topreg1 = zzz.head(10).ProvRes1.tolist()
# topreg1
fig000 = make_subplots(rows=5, cols=2, subplot_titles=topreg)
for i in topreg[::2]: # EVEN NUMBER
df = df_regres_daily1[df_regres_daily1.ProvRes == i].reset_index(drop=True)
cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
df['Trend_HP'] = trend1
df['Trend_MA7'] = df['Count'].rolling(7).mean()
df['Trend_MA3'] = df['Count'].rolling(3).mean()
df = df[df.DateRepConf >= '2020-03-01']
fig1 = px.bar(df[df.ProvRes == i], x='DateRepConf', y='Count',
labels={'DateRepConf': 'Date'},
color_discrete_sequence=['lightblue'])
fig1_trend = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline', color_discrete_sequence=["blue"])
fig000.add_trace(fig1.data[0], row=topreg[::2].index(i)+1, col=1)
fig000.add_trace(fig1_trend.data[0], row=topreg[::2].index(i)+1, col=1)
for i in topreg[1::2]: # EVEN NUMBER
df = df_regres_daily1[df_regres_daily1.ProvRes == i].reset_index(drop=True)
cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
df['Trend_HP'] = trend1
df['Trend_MA7'] = df['Count'].rolling(7).mean()
df['Trend_MA3'] = df['Count'].rolling(3).mean()
df = df[df.DateRepConf >= '2020-03-01']
fig1 = px.bar(df[df.ProvRes == i], x='DateRepConf', y='Count',
labels={'DateRepConf': 'Date'},
color_discrete_sequence=['lightblue'])
fig1_trend = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline', color_discrete_sequence=["blue"])
fig000.add_trace(fig1.data[0], row=topreg[1::2].index(i)+1, col=2)
fig000.add_trace(fig1_trend.data[0], row=topreg[1::2].index(i)+1, col=2)
fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
fig000.update_yaxes(tickfont_size=10, title="Case Count", titlefont_size=10)
fig000.update_layout(title=dict(x=0.5, text="COVID-19 DAILY CASES IN PROVINCES"), showlegend=False, height=2000,
legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"),
hoverlabel=dict(font_size=18))
fig000.show()
py.plot(fig000, filename='daily_cases_Provinces.html', auto_open=False)
# prov = ['TARLAC PROVINCE']
# fig000 = make_subplots(rows=5, cols=2, subplot_titles=prov)
# for i in prov: # EVEN NUMBER
# df = df_regres_daily[df_regres_daily.ProvRes == i].reset_index(drop=True)
# df = df.rename({'CaseCode': 'Count'}, axis=1)
# cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
# df['Trend_HP'] = trend1
# df['Trend_MA7'] = df['Count'].rolling(7).mean()
# df['Trend_MA3'] = df['Count'].rolling(3).mean()
# df = df[df.DateRepConf >= '2020-03-01']
# fig1 = px.bar(df[df.ProvRes == i], x='DateRepConf', y='Count',
# labels={'DateRepConf': 'Date'},
# color_discrete_sequence=['lightblue'])
# fig1_trend = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_HP', line_shape='spline', color_discrete_sequence=["blue"])
# fig000.add_trace(fig1.data[0], row=prov[::2].index(i)+1, col=1)
# fig000.add_trace(fig1_trend.data[0], row=prov[::2].index(i)+1, col=1)
# fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
# fig000.update_yaxes(tickfont_size=10, title="Case Count", titlefont_size=10)
# fig000.update_layout(title=dict(x=0.5), showlegend=False, height=2000,
# legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"),
# hoverlabel=dict(font_size=18))
# fig000.show()
Note: Trendline (in blue) is based on 7-day Moving Average.
# for i in topcity:
# df = df_city_daily1[df_city_daily1.CityMunRes == i].reset_index(drop=True)
# # cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
# # df['Trend_HP'] = trend1
# df['Trend_MA7'] = df['Count'].rolling(7).mean()
# df['Trend_MA3'] = df['Count'].rolling(3).mean()
# fig1 = px.bar(df[df.CityMunRes == i], x='DateRepConf', y='Count', color_discrete_sequence=["lightblue"])
# fig1_trend = px.line(df[df.CityMunRes == i], x='DateRepConf', y='Trend_MA7',
# color_discrete_sequence=["blue"])
# fig1.add_trace(fig1_trend.data[0])
# fig1.update_yaxes(title="Case Count", tickfont_size=10)
# fig1.update_layout(title=dict(x=0.5, text="Daily New Cases in " + f"<b>{i}</b>"), height=400,
# xaxis=dict(tickmode='linear', dtick=0, tickfont_size=9, title=None),
# legend=dict(orientation="v", title="",
# x=0.05, y=0.8, yanchor="bottom"),
# hoverlabel=dict(font_size=15), hovermode="x")
# fig1.show()
fig000 = make_subplots(rows=9, cols=2, subplot_titles=topcity)
for i in topcity[::2]: # ODD NUMBERS
df = df_city_daily1[df_city_daily1.CityMunRes == i].reset_index(drop=True)
cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
df['Trend_HP'] = trend1
df['Trend_MA7'] = df['Count'].rolling(7).mean()
df['Trend_MA3'] = df['Count'].rolling(3).mean()
df = df[df.DateRepConf >= '2020-03-01']
fig1 = px.bar(df[df.CityMunRes == i], x='DateRepConf', y='Count',
labels={'DateRepConf': 'Date'},
color_discrete_sequence=['lightblue'])
fig1_trend = px.line(df[df.CityMunRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline',
color_discrete_sequence=["blue"])
fig000.add_trace(fig1.data[0], row=topcity[::2].index(i)+1, col=1)
fig000.add_trace(fig1_trend.data[0], row=topcity[::2].index(i)+1, col=1)
for i in topcity[1::2]: # EVEN NUMBERS
df = df_city_daily1[df_city_daily1.CityMunRes == i].reset_index(drop=True)
cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
df['Trend_HP'] = trend1
df['Trend_MA7'] = df['Count'].rolling(7).mean()
df['Trend_MA3'] = df['Count'].rolling(3).mean()
df = df[df.DateRepConf >= '2020-03-01']
fig1 = px.bar(df[df.CityMunRes == i], x='DateRepConf', y='Count',
labels={'DateRepConf': 'Date'},
color_discrete_sequence=['lightblue'])
fig1_trend = px.line(df[df.CityMunRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline',
color_discrete_sequence=["blue"])
fig000.add_trace(fig1.data[0], row=topcity[1::2].index(i)+1, col=2)
fig000.add_trace(fig1_trend.data[0], row=topcity[1::2].index(i)+1, col=2)
fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
fig000.update_yaxes(tickfont_size=10, title="Case Count", titlefont_size=10)
fig000.update_layout(title=dict(x=0.5, text="COVID-19 DAILY CASES in NCR"), showlegend=False, height=3200,
legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"),
hoverlabel=dict(font_size=18))
fig000.show()
py.plot(fig000, filename='daily_cases_NCR.html', auto_open=False)
df_tests_tot = df_tests.groupby('report_date')[['cumulative_unique_individuals',
'cumulative_positive_individuals', 'cumulative_negative_individuals',
'cumulative_samples_tested', 'remaining_available_tests', 'backlogs']].sum()
# df_tests_tot['invalid'] = df_tests_tot['cumulative_unique_individuals'] - (df_tests_tot['cumulative_positive_individuals'] + df_tests_tot['cumulative_negative_individuals'])
# df_tests_tot['%Invalid'] = 100 - (df_tests_tot['%Positive'] + df_tests_tot['%Negative'])
df_tests_tot['%Positive'] = 100*(df_tests_tot['cumulative_positive_individuals'] / df_tests_tot['cumulative_unique_individuals'])
df_tests_tot['%Negative'] = 100*(df_tests_tot['cumulative_negative_individuals'] / df_tests_tot['cumulative_unique_individuals'])
df_tests_tot['Total_Tests_Available'] = df_tests_tot['cumulative_samples_tested'] + df_tests_tot['remaining_available_tests']
df_tests_tot["backlogs"] = df_tests_tot["backlogs"].replace(0.0, '')
df_tests_tot = df_tests_tot.reset_index()
data = pd.melt(df_tests_tot[-1:], id_vars=['report_date'],
value_vars=['cumulative_samples_tested', 'remaining_available_tests'],
var_name='Test_Ratio', value_name = 'Count')
data['Test_Ratio'] = data['Test_Ratio'].replace({'remaining_available_tests': 'Remaining Available Tests Supplies',
'cumulative_samples_tested': 'Samples Tested'})
data['Percent'] = (data.Count / data.Count.sum())*100
data['Pct'] = round(data['Percent'],1).astype('str') + '%'
data
q = 100*round(df_tests_tot[-1:].iloc[:,1].values[0] / df_tests_tot[-1:].iloc[:,4].values[0],3)
fig = px.bar(data, x='Percent', orientation='h', color='Test_Ratio',
text='Count', hover_name='Pct', height=250,
# color_discrete_sequence=['#66c2a5', '#84dbc0'])
color_discrete_sequence=['#64B0A2', '#C0D4CD'])
fig.update_layout(legend=dict(orientation='h', title=""), title="Availability of Test Kits")
fig.update_layout(showlegend=False, xaxis=dict(title=None), hoverlabel=dict(font_size=20))
fig.for_each_trace(lambda t: t.update(textfont_size=16))
for i in range(0, len(data.Percent.unique())):
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=data.Percent.cumsum().unique()[i]-0.001,
y=-0.6,
text=data.Test_Ratio[i]), font_size=16,
font_color = ['#64B0A2', '#C0D4CD'][i])
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=data.Percent.cumsum().unique()[i]-0.001,
y=0.6,
text=data.Pct[i]), font_size=16,
font_color = ['#64B0A2', '#C0D4CD'][i])
print("As of", df_tests.report_date.max().date().strftime("%b %d, %Y"))
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
# Figure 0
data = pd.melt(df_tests_tot[-1:], id_vars=['report_date'],
value_vars=['cumulative_negative_individuals', 'cumulative_positive_individuals'],
var_name='Test_Ratio', value_name = 'Count')
data['Test_Ratio'] = data['Test_Ratio'].replace({'cumulative_negative_individuals': 'Negative Individuals',
'cumulative_positive_individuals': 'Positive Individuals'})
data['Percent'] = (data.Count / data.Count.sum())*100
data['Pct'] = round(data['Percent'],1).astype('str') + '%'
data
q = round(df_tests_tot[-1:].iloc[:,1].values[0] / df_tests_tot[-1:].iloc[:,4].values[0]*100,1)
fig = px.bar(data, x='Percent', orientation='h', color='Test_Ratio',
text='Count', hover_name='Pct', height=250)
fig.update_layout(legend=dict(orientation='h', title="", font_size=16), showlegend=False,
xaxis=dict(title=None), hoverlabel=dict(font_size=20),
title=("Unique Individuals Tested: " + f"<b>{df_tests_tot[-1:].iloc[:,1].values[0]}</b>" +
" (" + q.astype(str) + "% of Total Samples Tested)"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=16))
for i in range(0, len(data.Percent.unique())):
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=data.Percent.cumsum().unique()[i]-0.001, y=0.6,
text=data.Pct[i]), font_size=16,
font_color = px.colors.qualitative.Plotly[i])
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=data.Percent.cumsum().unique()[i]-0.001, y=-0.6,
text=['Negative', 'Positive'][i]), font_size=14,
font_color = px.colors.qualitative.Plotly[i])
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
print("Note: There are more positive individuals tested vs publicly reported because of undergoing case validation and processing.")
df = pd.melt(df_tests_tot, id_vars=['report_date'],
# value_vars=['%Negative', '%Positive'],
value_vars=['%Positive', '%Negative'],
var_name = 'Ratio', value_name = 'Percent')
fig = px.bar(df, x='report_date', y='Percent', color='Ratio',
color_discrete_sequence=list(reversed(px.colors.qualitative.Plotly[0:2])))
fig.update_layout(title=dict(x=0.5, text="Historical Testing Ratio on Unique Individuals"),
xaxis=dict(tickmode='linear', #tick0=4,
dtick=0, tickfont_size=12, title="Date"),
yaxis=dict(tickfont_size=10),
legend=dict(orientation="h", title="",
x=0, y=0.97,
yanchor="bottom"))
fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=True, showgrid=True, zeroline=False, showline=False)
fig.update_layout(hoverlabel=dict(font_size=20), hovermode='x')
fig.show()
print("Note: Testing ratio here is computed based on the total samples tested as of that date")
df_tests_tot1 = df_tests.groupby(['report_date', 'facility_name'])[['cumulative_unique_individuals',
'cumulative_positive_individuals', 'cumulative_negative_individuals',
'cumulative_samples_tested', 'remaining_available_tests', 'backlogs']].sum()
df_tests_tot1['%Positive'] = 100*(df_tests_tot1['cumulative_positive_individuals'] / df_tests_tot1['cumulative_unique_individuals'])
df_tests_tot1['%Negative'] = 100*(df_tests_tot1['cumulative_negative_individuals'] / df_tests_tot1['cumulative_unique_individuals'])
df_tests_tot1['Total_Tests_Available'] = df_tests_tot1['cumulative_samples_tested'] + df_tests_tot1['remaining_available_tests']
df_tests_tot1 = df_tests_tot1.reset_index()
df_test_fac = df_tests_tot1[df_tests_tot1.report_date == df_tests_tot1.report_date.max()].sort_values('cumulative_samples_tested', ascending=False).reset_index(drop=True)
df_test_fac['%Total'] = round(df_test_fac['cumulative_unique_individuals'] / df_test_fac['cumulative_unique_individuals'].sum() * 100, 1)
df_test_fac['Facility'] = 'Testing Facility'
fig = px.treemap(df_test_fac,
path=['Facility', 'facility_name'],
values='cumulative_unique_individuals',
hover_name='%Total',
title='Number of Samples Tested by Facility',
branchvalues='total', height=700)
fig.update_layout(hoverlabel=dict(font_size=18))
fig.show()
# df_tests_tot['NEW UNIQUE TESTS'] = df_tests_tot[df_tests_tot.columns[1]].diff()
# df_tests_tot['NEW POSITIVE'] = df_tests_tot['cumulative_positive_individuals'].diff()
df_tests_tot['NEW UNIQUE TESTS'] = df_tests.groupby('report_date').daily_output_samples_tested.sum().values
df_tests_tot['NEW POSITIVE'] = df_tests.groupby('report_date').daily_output_positive_individuals.sum().values
df_tests_tot['NEW %POSITIVE'] = (df_tests_tot['NEW POSITIVE']/df_tests_tot['NEW UNIQUE TESTS'])*100
df_tests_tot['TREND'] = df_tests_tot['NEW %POSITIVE'].rolling(7).mean()
df_tests_tot["type1"] = 'new unique tests'
df_tests_tot["type2"] = 'backlogs^'
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig1 = px.bar(df_tests_tot, x='report_date', y='NEW UNIQUE TESTS', color='type1',
color_discrete_sequence=["#00CC96"])
fig2 = px.line(df_tests_tot, x='report_date', y='%Positive',
color_discrete_sequence=["red"])
fig2.data[0].update(mode='markers+lines')
fig3 = px.bar(df_tests_tot, x='report_date', y='backlogs', color='type2',
color_discrete_sequence=["blue"])
# fig3.data[0].update(mode='markers+lines')
fig.add_trace(fig1.data[0], secondary_y=False)
fig.add_trace(fig2.data[0], secondary_y=True)
fig.add_trace(fig3.data[0], secondary_y=False)
fig.update_yaxes(title="%Positive", tickfont_size=10, color='red',
range=[0, df_tests_tot['%Positive'].max()*1.1],
showgrid=False, secondary_y=True)
fig.update_yaxes(title='No. of Conducted Test', color='#00CC96',
#range=[0, df_tests_tot['UNIQUE INDIVIDUALS TESTED'].max()*1.5],
showgrid=True, tickfont_size=12, secondary_y=False)
fig.update_layout(title=dict(x=0.5, text="%Positive* on Daily Conducted Tests"),
xaxis=dict(tickmode='linear', #tick0=4,
dtick=0, tickfont_size=10, title="Date"),
yaxis=dict(dtick=1000), hoverlabel=dict(font_size=20),
legend=dict(orientation="v", title="",
x=0.03, y=0.4, yanchor="bottom"))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
# xanchor='left', yanchor='middle', ax=0, ay=0,
# x=pd.to_datetime(df_tests_tot.Date.unique().max()),
# y=df_tests_tot['%POSITIVE'][-1:].values[0],
# text=" "*2 + round(df_tests_tot['%POSITIVE'][-1:].values[0],1).astype('str')+'%',
# font_size=14, font_color=px.colors.qualitative.Plotly[1]))
fig.show()
print('*Computed based on the ratio of cumulative positive cases over cumulative conducted tests.')
print('^These are number of specimens submitted in the laboratory with no results released. Backlog data only started on May 19.')
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig1 = px.bar(df_tests_tot, x='report_date',
# y='UNIQUE INDIVIDUALS TESTED',
y='NEW UNIQUE TESTS',
color_discrete_sequence=["#00CC96"])
fig2 = px.line(df_tests_tot, x='report_date',
y='NEW %POSITIVE',
color_discrete_sequence=["red"])
fig2.data[0].update(mode='markers+lines')
df_tests_tot['TREND_NAME'] = '%Positive Trend (7-day MA)'
fig_trend = px.line(df_tests_tot, x='report_date',
y='TREND', color='TREND_NAME',
color_discrete_sequence=["red"])
fig.add_trace(fig1.data[0], secondary_y=False)
fig.add_trace(fig2.data[0], secondary_y=True)
fig.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}), secondary_y=True)
fig.update_yaxes(title="%Positive", tickfont_size=10, color='red',
range=[0, df_tests_tot['%Positive'].max()*1.1],
showgrid=False, secondary_y=True)
fig.update_yaxes(title='No. of Conducted Test', color='#00CC96',
#range=[0, df_tests_tot['UNIQUE INDIVIDUALS TESTED'].max()*1.5],
showgrid=True, tickfont_size=10, secondary_y=False)
fig.update_layout(title=dict(x=0.5, text="Estimated %Positive** on Daily Tests"),
xaxis=dict(tickmode='linear', #tick0=4,
dtick=0, tickfont_size=10, title="Date"),
yaxis=dict(dtick=1000), showlegend=True,
legend=dict(orientation="h", title="",
x=0.6, y=1, yanchor="bottom"))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
# xanchor='left', yanchor='middle', ax=0, ay=0,
# x=pd.to_datetime(df_tests_tot.Date.unique().max()),
# y=df_tests_tot['%POSITIVE'][-1:].values[0],
# text=" "*2 + round(df_tests_tot['%POSITIVE'][-1:].values[0],1).astype('str')+'%',
# font_size=14, font_color=px.colors.qualitative.Plotly[1]))
fig.show()
print("**Computed based on the ratio of new positive cases reported over new conducted tests on that day.")
df_recovered = covid_ph.loc[(covid_ph.RemovalType=='Recovered')].reset_index(drop=True)
df_died = covid_ph.loc[(covid_ph.RemovalType=='Died')].reset_index(drop=True)
df_recovered['Days'] = (df_recovered.DateRecover - df_recovered.DateRepConf).dt.days
df_died['Days'] = (df_died.DateDied - df_died.DateRepConf).dt.days
df = pd.concat([df_recovered, df_died])
fig = px.histogram(df, x='Days', color='RemovalType', opacity=0.8,
marginal='box', barmode='overlay',
labels={"RemovalType": "PatientType", "Days": "Days After Confirmation"},
title='Days of Recovery / Death after the Confirmation')
fig.update_layout(shapes=[dict(type= 'line',
yref= 'paper', y0= 0, y1= 1,
xref= 'x', x0= 0, x1= 0)],
legend=dict(orientation="h", title="",
x=0.0, y=0.65, yanchor="bottom"))
fig.add_annotation(dict(xref="x", yref="paper", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=df_recovered.Days.median()+2, y=0.87,
text=str(int(df_recovered.Days.median()))+' Days', font_size=11,
font_color = px.colors.qualitative.Plotly[0]))
fig.add_annotation(dict(xref="x", yref="paper", showarrow=False,
arrowhead=0, xanchor='right', yanchor='middle',
ax=0, ay=0,
x=df_died.Days.median()+2, y=1,
text=str(int(df_died.Days.median()))+' Days', font_size=11,
font_color = px.colors.qualitative.Plotly[1]))
fig.show()
Most of these death cases probably needs to be validated if caused by Covid-19 before publicly reported.
f = pd.DataFrame()
for i in range(0, 17):
ff = pd.read_csv("./data/nhfr/rfacilities2 (" + str(i) +").csv")
ff.rename(columns={ff.columns[0]: "HealthFacilityCode"}, inplace = True)
f = f.append(ff)
# f.to_excel("./data/ph_list_of_facilities.xlsx")
df_hosp1 = df_hosp.sort_values(by=['hfhudcode', 'reportdate'], ascending=[True, False]).reset_index(drop=True)
df_hosp2 = df_hosp1.groupby('hfhudcode').first().reset_index()
df_hosp2
df_hosp3 = pd.merge(df_hosp2, f.iloc[:, :15], how='left', left_on='hfhudcode', right_on='HealthFacilityCode')
# Categorizing Birthing Home as Infirmary
df_hosp3['Health Facility Type'] = df_hosp3['Health Facility Type'].replace({'Birthing Home': 'Infirmary'})
data2 = df_hosp3.groupby(['Ownership Major Classification', 'Health Facility Type'])['id'].count().reset_index()
data2.columns = ['OwnershipType', 'FacilityType', 'Count']
data2['Percent'] = data2['Count']/data2['Count'].sum()*100
data2['Pct'] = round(data2['Percent'], 1).astype(str)+'%'
data2['Cum_percent'] = data2.Percent.cumsum()
data2['OFType'] = data2.OwnershipType + " " + data2.FacilityType
print("Hospital Data as of", df_hosp.reportdate.max())
kulay = ['rgb(27,158,119)', 'rgb(102,194,165)', 'rgb(217,95,2)', 'rgb(252,141,98)']
fig = px.bar(data2, x='Percent', orientation='h', color='OFType',
text='Count', hover_name='Pct', height=250,
labels={'Percent': " ", 'index':'HospitalType'},
color_discrete_sequence=kulay)
fig.update_layout(legend=dict(orientation='h', title="", font_size=15.5),
title=("Total Number of HealthCare Facilities: " + f"<b>{data2.Count.sum()}</b>" + "*"),
hovermode=False)
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=20))
# for i in range(0, len(data2.Percent.unique())):
# fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
# arrowhead=0, xanchor='right', yanchor='middle',
# ax=0, ay=0,
# x=data2.Cum_percent.unique()[i]-0.001, y=0.5,
# text=data2.Pct[i]), font_size=12, font_color = kulay[i])
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
z = str(round(df_hosp3[df_hosp3['HealthFacilityCode'].isnull()].shape[0], 0))
z1 = str(round(df_hosp3[df_hosp3['HealthFacilityCode'].isnull()].shape[0]/df_hosp3.shape[0]*100,1))+'%'
print("Note:")
print("* Only the facilities who reported on the DataCollect App. Also, some hospitals & infirmaries don't have updated counts.")
print("* There are", z, "(", z1, ")", "facilities with data but no facility tagging from The National Health Facility Registry.")
data003 = pd.DataFrame()
for i in ['icu', 'isolbed', 'beds_ward', 'mechvent']:
data3 = df_hosp3.groupby(['Ownership Major Classification',
'Health Facility Type'])[[i+'_v',
i+'_o']].sum().reset_index().rename({'Ownership Major Classification': 'OwnershipType',
'Health Facility Type': 'FacilityType'}, axis=1)
data03 = pd.melt(data3, id_vars=['OwnershipType', 'FacilityType'],
value_vars=[i+'_v', i+'_o'], var_name=['Availability'],
value_name='Count')
data03['Availability'] = data03['Availability'].replace({i+'_v': 'vacant', i+'_o': 'occupied'})
data03['Type'] = i
data003 = data003.append(data03)
data003['Type'] = data003['Type'].replace({'icu': 'ICU Beds', 'isolbed': 'Isolation Beds',
'beds_ward': 'Ward Beds', 'mechvent': 'Mechanical Ventilators'})
data003 = data003.reset_index(drop=True)
data003['Percent'] = data003.groupby(['Type','OwnershipType','FacilityType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data003['Pct'] = round(data003['Percent'],1).astype('str') + '%'
data003['Type'] = data003['Type'].replace({'Mechanical Ventilators': 'Mech Vents'})
data003
##############################################################################
# TOTAL
data100 = data003.groupby(['Type', 'Availability'])['Count'].sum().reset_index()
data100['Percent'] = data100.groupby(['Type'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data100['Pct'] = round(data100['Percent'],1).astype('str') + '%'
x = data100[data100.Type.isin(['ICU Beds', 'Isolation Beds', 'Ward Beds'])]
y = data100[data100.Type.isin(['Mech Vents'])]
data100 = pd.concat([x, y])
data100 = data100.sort_values(by='Availability', ascending=False)
fig = px.bar(data100, x='Type', y='Percent', orientation='v', color='Availability',
text='Pct', hover_name='Count')
fig.update_layout(legend=dict(orientation='h', x=0.01, y=-0.1, title="", font_size=16),
hoverlabel=dict(font_size=20),
title=("Availability of Beds & Mechanical Ventilators in <b>Total PH Facilities</b>"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=18))
fig.update_xaxes(title=None, showticklabels=True, showgrid=False, zeroline=False, showline=False, side='bottom', tickfont_size=16)
fig.update_yaxes(title=None, showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_layout(shapes=[dict(type= 'line', yref= 'paper', y0= -0.05, y1= 0.95, xref= 'x', x0= 2.5, x1= 2.5,
line=dict(color="gray", width=1, dash="dash"))])
for i in range(0, len(data003.Type.unique())):
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='center', yanchor='middle',
ax=0, ay=0,
x=i,
y=-5,
text="Count: " + str(data003.groupby('Type').sum().Count[i])), font_size=12, font_color = "gray")
fig.show()
print("Note: These are beds dedicted exclusively for COVID-19 cases. Please interpret with caution, accuracy of data is not assured.")
# GOVERNMENT - HOSPITAL
data0011 = data003[(data003['OwnershipType'] == 'Government') & (data003['FacilityType'] == 'Hospital')].reset_index(drop=True)
fig3 = px.bar(data0011, x='Type', y='Percent', orientation='v', color='Availability',
text='Pct', hover_name='Count', hover_data=None)
fig3.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""),
hoverlabel=dict(font_size=20),
title=("Availability of Beds & Mechanical Vents in <b>Government Facilities</b>"))
fig3.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig3.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig3.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig3.show()
# GOVERNMENT - INFIRMARIES
data0011 = data003[(data003['OwnershipType'] == 'Government') & (data003['FacilityType'] == 'Infirmary')].reset_index(drop=True)
fig4 = px.bar(data0011, x='Type', y='Percent', orientation='v', color='Availability',
text='Pct', hover_name='Count', hover_data=None)
fig4.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""),
hoverlabel=dict(font_size=20),
title=("Availability of Beds & Mechanical Vents in <b>Private Facilities</b>"))
fig4.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig4.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig4.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig4.show()
# PRIVATE - HOSPITAL
data0022 = data003[(data003['OwnershipType'] == 'Private') & (data003['FacilityType'] == 'Hospital')].reset_index(drop=True)
fig5 = px.bar(data0022, x='Type', y='Percent', orientation='v', color='Availability',
text='Pct', hover_name='Count', hover_data=None)
fig5.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""),
hoverlabel=dict(font_size=20),
title=("Availability of Beds & Mechanical Vents in <b>Government Facilities</b>"))
fig5.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig5.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig5.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig5.show()
# PRIVATE -INFIRMARIES
data0022 = data003[(data003['OwnershipType'] == 'Private') & (data003['FacilityType'] == 'Infirmary')].reset_index(drop=True)
fig6 = px.bar(data0022, x='Type', y='Percent', orientation='v', color='Availability',
text='Pct', hover_name='Count', hover_data=None)
fig6.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""),
hoverlabel=dict(font_size=20),
title=("Availability of Beds & Mechanical Vents in <b>Private Facilities</b>"))
fig6.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig6.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig6.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig6.show()
# SUBPLOTS
fig000 = make_subplots(rows=2, cols=2, subplot_titles=("Government Hospitals", "Government Infirmaries",
"Private Hospitals", "Private Infirmaries"))
for loop in range(0, 2):
fig000.add_trace(fig3.data[loop], row=1, col=1)
fig000.add_trace(fig4.data[loop], row=1, col=2)
fig000.add_trace(fig5.data[loop], row=2, col=1)
fig000.add_trace(fig6.data[loop], row=2, col=2)
fig000.update_layout(barmode='stack',
xaxis1=dict(tickmode='linear', dtick=0, tickfont_size=12),
xaxis2=dict(tickmode='linear', dtick=0, tickfont_size=12))
fig000.update_xaxes(title=None, side='bottom', tickmode='array', ticktext=['a', 'b', 'c', 'd'])
fig000.update_yaxes(title=None, showticklabels=False, showgrid=False,
zeroline=False, showline=False)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=800,
legend=dict(orientation="h", x=-0.05, y=-0.4,
yanchor="bottom"), hoverlabel=dict(font_size=20),
xaxis_tickformat='<br>')
for i in range(0, len(data003.Type.unique())):
fig000.add_annotation(dict(xref="x1", yref="y1", showarrow=False,
arrowhead=0, xanchor='center', yanchor='middle',
ax=0, ay=0, x=i, y=-5,
text="Count: " + str(int(fig3.data[0]['hovertext'][i]+fig3.data[1]['hovertext'][i])),
font_size=11, font_color = "gray"))
fig000.add_annotation(dict(xref="x2", yref="y2", showarrow=False,
arrowhead=0, xanchor='center', yanchor='middle',
ax=0, ay=0, x=i, y=-5,
text="Count: " + str(int(fig4.data[0]['hovertext'][i]+fig4.data[1]['hovertext'][i])),
font_size=11, font_color = "gray"))
fig000.add_annotation(dict(xref="x3", yref="y3", showarrow=False,
arrowhead=0, xanchor='center', yanchor='middle',
ax=0, ay=0, x=i, y=-5,
text="Count: " + str(int(fig5.data[0]['hovertext'][i]+fig5.data[1]['hovertext'][i])),
font_size=11, font_color = "gray"))
fig000.add_annotation(dict(xref="x4", yref="y4", showarrow=False,
arrowhead=0, xanchor='center', yanchor='middle',
ax=0, ay=0, x=i, y=-5,
text="Count: " + str(int(fig6.data[0]['hovertext'][i]+fig6.data[1]['hovertext'][i])),
font_size=11, font_color = "gray"))
fig000.show()
print("Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.")
df_hosp3_ncr = df_hosp3[df_hosp3['Region Name'] == 'NATIONAL CAPITAL REGION (NCR)'].reset_index(drop=True)
data0033 = pd.DataFrame()
for i in ['icu', 'isolbed', 'beds_ward', 'mechvent']:
data33 = df_hosp3_ncr.groupby(['Ownership Major Classification',
'Health Facility Type'])[[i+'_v',
i+'_o']].sum().reset_index().rename({'Ownership Major Classification': 'OwnershipType',
'Health Facility Type': 'FacilityType'}, axis=1)
data033 = pd.melt(data33, id_vars=['OwnershipType', 'FacilityType'],
value_vars=[i+'_v', i+'_o'], var_name=['Availability'],
value_name='Count')
data033['Availability'] = data033['Availability'].replace({i+'_v': 'vacant', i+'_o': 'occupied'})
data033['Type'] = i
data0033 = data0033.append(data033)
data0033['Type'] = data0033['Type'].replace({'icu': 'ICU Beds', 'isolbed': 'Isolation Beds',
'beds_ward': 'Ward Beds', 'mechvent': 'Mechanical Ventilators'})
data0033 = data0033.reset_index(drop=True)
data0033['Percent'] = data0033.groupby(['Type','OwnershipType','FacilityType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data0033['Pct'] = round(data0033['Percent'],1).astype('str') + '%'
data0033['Type'] = data003['Type'].replace({'Mechanical Ventilators': 'Mech Vents'})
data0033
##############################################################################
# TOTAL
data100 = data0033.groupby(['Type', 'Availability'])['Count'].sum().reset_index()
data100['Percent'] = data100.groupby(['Type'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data100['Pct'] = round(data100['Percent'],1).astype('str') + '%'
x = data100[data100.Type.isin(['ICU Beds', 'Isolation Beds', 'Ward Beds'])]
y = data100[data100.Type.isin(['Mech Vents'])]
data100 = pd.concat([x, y])
data100 = data100.sort_values(by='Availability', ascending=False)
fig = px.bar(data100, x='Type', y='Percent', orientation='v', color='Availability',
text='Pct', hover_name='Count')
fig.update_layout(legend=dict(orientation='h', x=0.01, y=-0.1, title="", font_size=16),
hoverlabel=dict(font_size=20),
title=("Availability of Beds & Mechanical Ventilators in <b>NCR Facilities</b>"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=18))
fig.update_xaxes(title=None, showticklabels=True, showgrid=False, zeroline=False, showline=False, side='bottom', tickfont_size=16)
fig.update_yaxes(title=None, showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_layout(shapes=[dict(type= 'line', yref= 'paper', y0= -0.05, y1= 0.95, xref= 'x', x0= 2.5, x1= 2.5,
line=dict(color="gray", width=1, dash="dash"))])
for i in range(0, len(data003.Type.unique())):
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
arrowhead=0, xanchor='center', yanchor='middle',
ax=0, ay=0,
x=i,
y=-5,
text="Count: " + str(data0033.groupby('Type').sum().Count[i])), font_size=12, font_color = "gray")
fig.show()
# GOVERNMENT - HOSPITAL
data00111 = data0033[(data0033['OwnershipType'] == 'Government') & (data0033['FacilityType'] == 'Hospital')]
fig3 = px.bar(data00111, x='Type', y='Percent', orientation='v', color='Availability',
text='Pct', hover_name='Count', hover_data=None)
fig3.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""),
hoverlabel=dict(font_size=20),
title=("Availability of Beds & Mechanical Vents in <b>Government Facilities</b>"))
fig3.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig3.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig3.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig3.show()
# GOVERNMENT - INFIRMARIES
data00112 = data0033[(data0033['OwnershipType'] == 'Government') & (data0033['FacilityType'] == 'Infirmary')]
fig4 = px.bar(data00112, x='Type', y='Percent', orientation='v', color='Availability',
text='Pct', hover_name='Count', hover_data=None)
fig4.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""),
hoverlabel=dict(font_size=20),
title=("Availability of Beds & Mechanical Vents in <b>Private Facilities</b>"))
fig4.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig4.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig4.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig4.show()
# PRIVATE - HOSPITAL
data00221 = data0033[(data0033['OwnershipType'] == 'Private') & (data0033['FacilityType'] == 'Hospital')]
fig5 = px.bar(data00221, x='Type', y='Percent', orientation='v', color='Availability',
text='Pct', hover_name='Count', hover_data=None)
fig5.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""),
hoverlabel=dict(font_size=20),
title=("Availability of Beds & Mechanical Vents in <b>Government Facilities</b>"))
fig5.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig5.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig5.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig5.show()
# PRIVATE -INFIRMARIES
data00222 = data0033[(data0033['OwnershipType'] == 'Private') & (data0033['FacilityType'] == 'Infirmary')]
fig6 = px.bar(data00222, x='Type', y='Percent', orientation='v', color='Availability',
text='Pct', hover_name='Count', hover_data=None)
fig6.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""),
hoverlabel=dict(font_size=20),
title=("Availability of Beds & Mechanical Vents in <b>Private Facilities</b>"))
fig6.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig6.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig6.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig6.show()
# SUBPLOTS
fig000 = make_subplots(rows=2, cols=2, subplot_titles=("Government Hospitals", "Government Infirmaries",
"Private Hospitals", "Private Infirmaries"))
for loop in range(0, 2):
fig000.add_trace(fig3.data[loop], row=1, col=1)
fig000.add_trace(fig4.data[loop], row=1, col=2)
fig000.add_trace(fig5.data[loop], row=2, col=1)
fig000.add_trace(fig6.data[loop], row=2, col=2)
fig000.update_layout(barmode='stack',
xaxis1=dict(tickmode='linear', dtick=0, tickfont_size=12),
xaxis2=dict(tickmode='linear', dtick=0, tickfont_size=12))
fig000.update_xaxes(title=None, side='bottom', tickmode='array', ticktext=['a', 'b', 'c', 'd'])
fig000.update_yaxes(title=None, showticklabels=False, showgrid=False,
zeroline=False, showline=False)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=800,
legend=dict(orientation="h", x=-0.05, y=-0.4,
yanchor="bottom"), hoverlabel=dict(font_size=20),
xaxis_tickformat='<br>')
for i in range(0, len(data0033.Type.unique())):
fig000.add_annotation(dict(xref="x1", yref="y1", showarrow=False,
arrowhead=0, xanchor='center', yanchor='middle',
ax=0, ay=0, x=i, y=-5,
text="Count: " + str(int(fig3.data[0]['hovertext'][i]+fig3.data[1]['hovertext'][i])),
font_size=11, font_color = "gray"))
fig000.add_annotation(dict(xref="x2", yref="y2", showarrow=False,
arrowhead=0, xanchor='center', yanchor='middle',
ax=0, ay=0, x=i, y=-5,
text="Count: " + str(int(fig4.data[0]['hovertext'][i]+fig4.data[1]['hovertext'][i])),
font_size=11, font_color = "gray"))
fig000.add_annotation(dict(xref="x3", yref="y3", showarrow=False,
arrowhead=0, xanchor='center', yanchor='middle',
ax=0, ay=0, x=i, y=-5,
text="Count: " + str(int(fig5.data[0]['hovertext'][i]+fig5.data[1]['hovertext'][i])),
font_size=11, font_color = "gray"))
fig000.add_annotation(dict(xref="x4", yref="y4", showarrow=False,
arrowhead=0, xanchor='center', yanchor='middle',
ax=0, ay=0, x=i, y=-5,
text="Count: " + str(int(fig6.data[0]['hovertext'][i]+fig6.data[1]['hovertext'][i])),
font_size=11, font_color = "gray"))
fig000.show()
print("Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.")
# df_ppe1 = df_ppe.sort_values(by=['hfhudcode', 'reportdate'], ascending=[True, False]).reset_index(drop=True)
# df_ppe2 = df_ppe1.groupby('hfhudcode').first().reset_index()
# df_ppe2
# print("Availability of PPEs in Different Hospitals:")
# print("- Coverall:", df_ppe2.coverall.sum())
# print("- Face Shield:", df_ppe2.face_shield.sum())
# print("- Goggles:", df_ppe2.goggles.sum())
# print("- Gown:", df_ppe2.gown.sum())
# print("- N95 Mask:", df_ppe2.n95mask.sum())
# print("- Shoe Cover:", df_ppe2.shoe_cover.sum())
# print(" ")
# print("- Head Cover:", df_ppe2.head_cover.sum())
# print("- Surgical Mask:", df_ppe2.surgmask.sum())
# print("- Gloves:", df_ppe2.gloves.sum())
Main Reference: Kevin Systrom
In any epidemic, $R_t$ is the measure known as the effective reproduction number. It can be simply defined as the number of people who can become infected per infectious person at time $t$. The most well-known version of this number is the basic reproduction number: $R_0$ when $t=0$. However, $R_0$ is a single measure that does not adapt with changes in behavior and restrictions.
As a pandemic evolves, increasing restrictions (or potential releasing of restrictions) changes $R_t$. Knowing the current $R_t$ is essential. When $R\gg1$, the pandemic will spread through a large part of the population. If $R_t$ $<1$, the pandemic will slow quickly before it has a chance to infect many people. The lower the $R_t$: the more manageable the situation.
Given this, what we want to see is $R_t$ of less than 1 which means that the pandemic in an area is under control.
def prepare_cases(cases):
new_cases = cases.diff()
smoothed = new_cases.rolling(7,
win_type='gaussian',
min_periods=1,
center=True).mean(std=2).round()
zeros = smoothed.index[smoothed.eq(0)]
if len(zeros) == 0:
idx_start = 0
else:
last_zero = zeros.max()
idx_start = smoothed.index.get_loc(last_zero) + 1
smoothed = smoothed.iloc[idx_start:]
original = new_cases.loc[smoothed.index]
return original, smoothed
Note that we will start the computation on March 5, 2020 - the date of the last zero new case. To derive the trend, Gaussian filter was applied to arrive with a smoothed time series. This will be the basis in calculating for each days' likelihood and posteriors as part of the Bayesian approach in estimating $R_t$. To know more about the methodology, you may check out the work of Kevin Systrom.
# cases = cases_daily.cumsum()
cases = cases_daily[35:].cumsum()
original, smoothed = prepare_cases(cases)
# original.plot(title=f"PH Daily New Cases",
# c='k',
# linestyle=':',
# alpha=.5,
# label='Actual',
# legend=True,
# figsize=(1200/72, 600/72))
# ax = smoothed.plot(label='Smoothed', color='#0084ff', legend=True)
# ax.get_figure().set_facecolor('w')
from scipy import stats as sps
def get_posteriors(sr, sigma=0.15):
# (1) Calculate Lambda
lam = sr[:-1].values * np.exp(GAMMA * (r_t_range[:, None] - 1))
# (2) Calculate each day's likelihood
likelihoods = pd.DataFrame(
data = sps.poisson.pmf(sr[1:].values, lam),
index = r_t_range,
columns = sr.index[1:])
# (3) Create the Gaussian Matrix
process_matrix = sps.norm(loc=r_t_range,
scale=sigma
).pdf(r_t_range[:, None])
# (3a) Normalize all rows to sum to 1
process_matrix /= process_matrix.sum(axis=0)
# (4) Calculate the initial prior
prior0 = sps.gamma(a=4).pdf(r_t_range)
prior0 /= prior0.sum()
# Create a DataFrame that will hold our posteriors for each day
# Insert our prior as the first posterior.
posteriors = pd.DataFrame(
index=r_t_range,
columns=sr.index,
data={sr.index[0]: prior0}
)
# We said we'd keep track of the sum of the log of the probability
# of the data for maximum likelihood calculation.
log_likelihood = 0.0
# (5) Iteratively apply Bayes' rule
for previous_day, current_day in zip(sr.index[:-1], sr.index[1:]):
#(5a) Calculate the new prior
current_prior = process_matrix @ posteriors[previous_day]
#(5b) Calculate the numerator of Bayes' Rule: P(k|R_t)P(R_t)
numerator = likelihoods[current_day] * current_prior
#(5c) Calcluate the denominator of Bayes' Rule P(k)
denominator = np.sum(numerator)
# Execute full Bayes' Rule
posteriors[current_day] = numerator/denominator
# Add to the running sum of log likelihoods
log_likelihood += np.log(denominator)
return posteriors, log_likelihood
# We create an array for every possible value of Rt
R_T_MAX = 12
r_t_range = np.linspace(0, R_T_MAX, R_T_MAX*100+1)
# Gamma is 1/serial interval
# https://wwwnc.cdc.gov/eid/article/26/7/20-0282_article
# https://www.nejm.org/doi/full/10.1056/NEJMoa2001316
GAMMA = 1/7
# Note that we're fixing sigma to a value just for the example
posteriors, log_likelihood = get_posteriors(smoothed, sigma=.15)
# ax = posteriors.plot(title=f'Daily Posterior for $R_t$',
# legend=False,
# lw=1, c='k', alpha=.3,
# figsize=(1200/72, 800/72),
# xlim=(0.4,4))
# ax.set_xlabel('$R_t$')
Next, since the results include uncertainty, we'd view the most likely value of $R_t$ along with its highest-density interval (HDI). HDI simply indicates which points of a distribution are most credible, and which cover most of the distribution. It can be compared to a confidence interval where an estimate will mostly like falls on a range given a probability ($p$). For this analysis, we will be defining the High HDI and Low HDI given $p=0.9$.
def highest_density_interval(pmf, p=.9):
# If we pass a DataFrame, just call this recursively on the columns
if(isinstance(pmf, pd.DataFrame)):
return pd.DataFrame([highest_density_interval(pmf[col], p=p) for col in pmf],
index=pmf.columns)
cumsum = np.cumsum(pmf.values)
best = None
for i, value in enumerate(cumsum):
for j, high_value in enumerate(cumsum[i+1:]):
if (high_value-value > p) and (not best or j<best[1]-best[0]):
best = (i, i+j+1)
break
low = pmf.index[best[0]]
high = pmf.index[best[1]]
return pd.Series([low, high], index=[f'Low_{p*100:.0f}', f'High_{p*100:.0f}'])
# Note that this takes a while to execute - it's not the most efficient algorithm
hdis = highest_density_interval(posteriors, p=.9)
most_likely = posteriors.idxmax().rename('Estimated Rt')
# Look into why you shift -1
result = pd.concat([most_likely, hdis], axis=1)
# result.tail()
# fig, ax = plt.subplots(figsize=(16, 8))
# most_likely.plot(marker='o',
# label='Most Likely',
# c='k',
# zorder=2,
# markersize=4,
# ax=ax)
# ax.fill_between(hdis.index,
# hdis['Low_90'],
# hdis['High_90'],
# color='k',
# zorder=1,
# alpha=.1,
# lw=0,
# label='Highest Density Interval')
# ax.axhline(1, linestyle='--')
# ax.axvline(most_likely.index[11], linewidth=2,
# zorder=0, color='#fa3c4c',
# label=f'Start of Lockdown: \n{str(most_likely.index[11].date())}')
# ax.axvline(most_likely.index[40], linewidth=2,
# zorder=0, color='#44bec7',
# label=f'Start of Progressive Mass Testing (?): \n{str(most_likely.index[40].date())}')
# ax.set_xlabel('Date', size=13)
# ax.set_ylabel('Effective Reproduction (Rt)', size=13)
# ax.set_title(f'$R_t$ by day', size=15, y=1.02)
# ax.legend();
fig = px.line(most_likely, y=most_likely.values, x=most_likely.index)
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Effective Reproduction - R(t)')
fig.update_layout(title=dict(x=0.5, text='PH Estimated R(t) Based on New Daily Cases'),
showlegend=True,
legend=dict(orientation="v", title="",
x=0.8, y=0.95, yanchor="top"),
hoverlabel=dict(font_size=18))
fig.add_scatter(x=hdis['Low_90'].index, y=hdis['Low_90'].values,
fill='tonexty', line=dict(color='lightgray'), name='HDI (Low @ 90%)')
fig.add_scatter(x=hdis['High_90'].index, y=hdis['High_90'].values,
fill='tonexty', line=dict(color='lightgray'), name='HDI (High @ 90%)')
fig.add_scatter(y=most_likely.values, x=most_likely.index, line=dict(color='blue'), name='<b>R(t)</b>')
fig.add_scatter(y=[1]*most_likely.index.shape[0], x=most_likely.index,
line=dict(color='orange', dash='dash'), name='R(t) = 1')
fig.show()
# results_prov = pd.DataFrame()
# for i in topreg[:7]:
# df0 = df_regres_daily1[df_regres_daily1['ProvRes'] == i]
# df = df0[['DateRepConf', 'Cum_count']][df0.DateRepConf >= '2020-03-05']
# df1 = df.set_index('DateRepConf')
# df1 = df1.iloc[:,0].astype('int64')
# idx = pd.date_range('2020-03-05', df1.index[-1].date())
# df1.index = pd.DatetimeIndex(df1.index)
# df1 = df1.reindex(idx, fill_value=0)
# df1
# original, smoothed = prepare_cases(df1)
# posteriors, log_likelihood = get_posteriors(smoothed, sigma=.15)
# # Note that this takes a while to execute - it's not the most efficient algorithm
# hdis = highest_density_interval(posteriors, p=.9)
# most_likely = posteriors.idxmax().rename('Estimated Rt')
# # Look into why you shift -1
# result_prov = pd.concat([most_likely, hdis], axis=1)
# result_prov['ProvRes'] = i
# results_prov = results_prov.append(result_prov)
# fig000 = make_subplots(rows=4, cols=2, subplot_titles=topreg[:8])
# for i in topreg[:8][::2]: # EVEN NUMBER
# fig = px.line(results_prov[results_prov.ProvRes == i]['Estimated Rt'],
# y=results_prov[results_prov.ProvRes == i]['Estimated Rt'].values,
# x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index)
# fig.update_xaxes(title='Date')
# fig.update_yaxes(title='Effective Reproduction - R(t)')
# fig000.add_trace(fig.data[0], row=topreg[:8][::2].index(i)+1, col=1)
# fig000.add_scatter(x=results_prov[results_prov.ProvRes == i]['High_90'].index,
# y=results_prov[results_prov.ProvRes == i]['High_90'].values,
# fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - High 90%',
# row=topreg[:8][::2].index(i)+1, col=1)
# fig000.add_scatter(x=results_prov[results_prov.ProvRes == i]['Low_90'].index,
# y=results_prov[results_prov.ProvRes == i]['Low_90'].values,
# fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - Low 90%',
# row=topreg[:8][::2].index(i)+1, col=1)
# fig000.add_scatter(y=results_prov[results_prov.ProvRes == i]['Estimated Rt'].values,
# x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index,
# line=dict(color='blue'), name='R(t)',
# row=topreg[:8][::2].index(i)+1, col=1)
# fig000.add_scatter(y=[1]*results_prov[results_prov.ProvRes == i]['Estimated Rt'].index.shape[0],
# x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index,
# line=dict(color='orange', dash='dash'), name='R(t)',
# row=topreg[:8][::2].index(i)+1, col=1)
# for i in topreg[:8][1::2]: # ODD NUMBER
# fig = px.line(results_prov[results_prov.ProvRes == i]['Estimated Rt'],
# y=results_prov[results_prov.ProvRes == i]['Estimated Rt'].values,
# x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index)
# fig.update_xaxes(title='Date')
# fig.update_yaxes(title='Effective Reproduction - R(t)')
# fig000.add_trace(fig.data[0], row=topreg[:8][1::2].index(i)+1, col=2)
# fig000.add_scatter(x=results_prov[results_prov.ProvRes == i]['High_90'].index,
# y=results_prov[results_prov.ProvRes == i]['High_90'].values,
# fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - High 90%',
# row=topreg[:8][1::2].index(i)+1, col=2)
# fig000.add_scatter(x=results_prov[results_prov.ProvRes == i]['Low_90'].index,
# y=results_prov[results_prov.ProvRes == i]['Low_90'].values,
# fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - Low 90%',
# row=topreg[:8][1::2].index(i)+1, col=2)
# fig000.add_scatter(y=results_prov[results_prov.ProvRes == i]['Estimated Rt'].values,
# x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index,
# line=dict(color='blue'), name='R(t)',
# row=topreg[:8][1::2].index(i)+1, col=2)
# fig000.add_scatter(y=[1]*results_prov[results_prov.ProvRes == i]['Estimated Rt'].index.shape[0],
# x=results_prov[results_prov.ProvRes == i]['Estimated Rt'].index,
# line=dict(color='orange', dash='dash'), name='R(t)',
# row=topreg[:8][1::2].index(i)+1, col=2)
# fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
# fig000.update_yaxes(tickfont_size=10, title="R(t)", titlefont_size=10)
# fig000.update_layout(title=dict(x=0.5, text="Estimated R(t) by Provinces With Most Cases"),
# showlegend=False, height=1600,
# legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"),
# hoverlabel=dict(font_size=18))
# fig000.show()
# def prepare_cases(cases):
# new_cases = cases.diff()
# smoothed = new_cases.rolling(7,
# win_type='gaussian',
# min_periods=1,
# center=True).mean(std=2).round()
# zeros = smoothed.index[smoothed.eq(0)]
# if len(zeros) == 0:
# idx_start = 0
# else:
# last_zero = zeros.max()
# idx_start = smoothed.index.get_loc(last_zero) + 1
# smoothed = smoothed.iloc[idx_start:]
# original = new_cases.loc[smoothed.index]
# return original, smoothed
# results_ncr = pd.DataFrame()
# for i in topcity[:-4]:
# df0 = df_city_daily1[df_city_daily1['CityMunRes'] == i]
# df = df0[['DateRepConf', 'Cum_count']][df0.DateRepConf >= '2020-03-05']
# df1 = df.set_index('DateRepConf')
# df1 = df1.iloc[:,0].astype('int64')
# idx = pd.date_range('2020-03-05', df1.index[-1].date())
# df1.index = pd.DatetimeIndex(df1.index)
# df1 = df1.reindex(idx, fill_value=0)
# df1
# original, smoothed = prepare_cases(df1)
# posteriors, log_likelihood = get_posteriors(smoothed, sigma=.15)
# # Note that this takes a while to execute - it's not the most efficient algorithm
# hdis = highest_density_interval(posteriors, p=.9)
# most_likely = posteriors.idxmax().rename('Estimated Rt')
# # Look into why you shift -1
# result_ncr = pd.concat([most_likely, hdis], axis=1)
# result_ncr['CityMunRes'] = i
# results_ncr = results_ncr.append(result_ncr)
# fig000 = make_subplots(rows=7, cols=2, subplot_titles=topcity[:-3])
# for i in topcity[:-3][::2]: # EVEN NUMBER
# fig = px.line(results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'],
# y=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].values,
# x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index)
# fig.update_xaxes(title='Date')
# fig.update_yaxes(title='Effective Reproduction - R(t)')
# fig000.add_trace(fig.data[0], row=topcity[:-3][::2].index(i)+1, col=1)
# fig000.add_scatter(x=results_ncr[results_ncr.CityMunRes == i]['High_90'].index,
# y=results_ncr[results_ncr.CityMunRes == i]['High_90'].values,
# fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - High 90%',
# row=topcity[:-3][::2].index(i)+1, col=1)
# fig000.add_scatter(x=results_ncr[results_ncr.CityMunRes == i]['Low_90'].index,
# y=results_ncr[results_ncr.CityMunRes == i]['Low_90'].values,
# fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - Low 90%',
# row=topcity[:-3][::2].index(i)+1, col=1)
# fig000.add_scatter(y=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].values,
# x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index,
# line=dict(color='blue'), name='R(t)',
# row=topcity[:-3][::2].index(i)+1, col=1)
# fig000.add_scatter(y=[1]*results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index.shape[0],
# x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index,
# line=dict(color='orange', dash='dash'), name='R(t)',
# row=topcity[:-3][::2].index(i)+1, col=1)
# for i in topcity[:-3][1::2]: # EVEN NUMBER
# fig = px.line(results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'],
# y=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].values,
# x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index)
# fig.update_xaxes(title='Date')
# fig.update_yaxes(title='Effective Reproduction - R(t)')
# fig000.add_trace(fig.data[0], row=topcity[:-3][1::2].index(i)+1, col=2)
# fig000.add_scatter(x=results_ncr[results_ncr.CityMunRes == i]['High_90'].index,
# y=results_ncr[results_ncr.CityMunRes == i]['High_90'].values,
# fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - High 90%',
# row=topcity[:-3][1::2].index(i)+1, col=2)
# fig000.add_scatter(x=results_ncr[results_ncr.CityMunRes == i]['Low_90'].index,
# y=results_ncr[results_ncr.CityMunRes == i]['Low_90'].values,
# fill='tonexty', fillcolor='lightgray', line=dict(color='lightgray'), name='HDI - Low 90%',
# row=topcity[:-3][1::2].index(i)+1, col=2)
# fig000.add_scatter(y=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].values,
# x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index,
# line=dict(color='blue'), name='R(t)',
# row=topcity[:-3][1::2].index(i)+1, col=2)
# fig000.add_scatter(y=[1]*results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index.shape[0],
# x=results_ncr[results_ncr.CityMunRes == i]['Estimated Rt'].index,
# line=dict(color='orange', dash='dash'), name='R(t)',
# row=topcity[:-3][1::2].index(i)+1, col=2)
# fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
# fig000.update_yaxes(tickfont_size=10, title="R(t)", titlefont_size=10)
# fig000.update_layout(title=dict(x=0.5, text="Estimated R(t) by NCR Cities"),
# showlegend=False, height=3200,
# legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"),
# hoverlabel=dict(font_size=18))
# fig000.show()
# print("Note: Malabon, Navatos and Pateros can't be estimated well due to small number of new cases")
cc2 = cc[31:].reset_index(drop=True) # March 1, 2020
cc2['Date'] = cc2['Date'].dt.date
cc2['Count_rmean_7days'] = cc2.Count.rolling(7).mean()
cc2['Cum_count_rmean_7days'] = cc2.Count.cumsum().rolling(7).mean()
fig = px.line(cc2, x='Cum_count_rmean_7days', y='Count_rmean_7days',
hover_name='Date', title='New Cases vs Total Cases (7-day Rolling Average)',
log_x=True, log_y=True)
fig.update_xaxes(title="log(Total Confirmed Cases)")
fig.update_yaxes(title="log(New Confirmed Cases)")
fig.show()
The growth started to slow down during the first week of April & currently moving (fluctuating) sideways. Yes, it started to plateau but what we need is a downtrend / sudden drop to achieve a "flattening curve".
Doubling time is the defined as the length of time for the present value of variable to double given the prevailing rate of increase. It is based on the compound accumulation model. Here's the formula for doubling time ${d_{t,y}}$:
Source: PJ Cayton and JG Sarmiento
# "Epidemic doubling times characterize the sequence of intervals at which the cumulative incidence doubles (3). If an epidemic is growing exponentially with a constant growth rate ${r}$, the doubling time remains constant and equals $\frac{ln 2}{r}$. An increase in the doubling time indicates a slowdown in transmission if the underlying reporting rate remains unchanged."
# Source: https://wwwnc.cdc.gov/eid/article/26/8/20-0219_article
# To calculate the doubling time for a population (e.g. COVID-19 cases), use the formula = $\frac{x ln(2)}{ln \frac{y}{z}$, where
# - x is the time that has passed since you started measuring. For example, if the number of cases went from 500 on day 0 to 1000 on day 2, x is 2.
# - y is the number of cases on day x, e.g 1000 on day 2.
# - z is the number of cases on day 0, e.g. 500."
# Source: https://blog.datawrapper.de/weekly-chart-coronavirus-doublingtimes/
# # version1
# c2 = c1.copy()
# c2['doubling_time1'] = np.log(2) / (c2['%Change']/100)
# c2['doubling_time2'] = np.log(2) / (c2['trend']/100)
# # version2
# c2 = c1.copy()
# c2['doubling_time1'] = np.log10(2) / (c2['%Change']/100)
# c2['doubling_time2'] = np.log10(2) / (c2['trend']/100)
# version3
c2 = c1.copy()
c2['doubling_time1'] = np.log(2) / np.log(1 + (c2['%Change']/100))
c2['doubling_time2'] = np.log(2) / np.log(1 + (c2['trend']/100))
# version3.1
# c2['doubling_time3'] = c2['doubling_time1'].rolling(7).mean()
# cyclex, trendx = sm.tsa.filters.hpfilter(c2['doubling_time1'], 7)
# c2['doubling_time4'] = trendx
c2['Days'] = 'Doubling Time'
c2['Trend_name1'] = 'Trend (based on HP Filter)'
fig_trend1 = px.line(c2, x='Date', y='doubling_time2', color='Trend_name1',
color_discrete_sequence=["cadetblue"],
labels={'doubling_time2': 'Days-To-Double'})
# c2['Trend_name2'] = 'Trend (based on 7-day rolling mean)'
# fig_trend2 = px.line(c2, x='Date', y='doubling_time4', color='Trend_name2',
# color_discrete_sequence=["cadetblue"])
fig2 = px.line(c2, x='Date', y='doubling_time1', color='Days',
labels={'doubling_time1': 'Days-To-Double'},
color_discrete_sequence=["lightblue"])
fig2.add_trace(fig_trend1.data[0].update(line={'dash': 'dash'}))
# fig2.add_trace(fig_trend2.data[0].update(line={'dash': 'dashdot'}))
fig2.update_yaxes(title="Days-to-Double", showgrid=True, tickfont_size=15)
fig2.update_layout(title=dict(x=0.5, text="Doubling Time in Confirmed Cases for PH"),
xaxis=dict(tickmode='array', #tick0=4, dtick=0,
tickfont_size=9, title="Date", showgrid=False),
legend=dict(orientation="v", title="",
x=0.1, y=0.85, yanchor="bottom"),
hoverlabel=dict(font_size=20))
fig2.update_layout(shapes=[dict(type='line',
yref='y', y0=30, y1=30,
xref='paper', x0=0, x1=1,
line=dict(color="orange", width=1))])
fig2.show()
d2 = d1.copy()
d2['doubling_time1'] = np.log(2) / (d2['%Change']/100)
d2['doubling_time2'] = np.log(2) / (d2['trend']/100)
d2['Days'] = 'Doubling Time'
d2['Trend_name1'] = 'Trend (based on HP Filter)'
fig_trend = px.line(d2, x='Date', y='doubling_time2', color='Trend_name1',
labels={'doubling_time2': 'Days-To-Double'},
color_discrete_sequence=["tomato"])
fig2 = px.line(d2, x='Date', y='doubling_time1', color='Days',
labels={'doubling_time1': 'Days-To-Double'},
color_discrete_sequence=["lightpink"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="Days-to-Double", showgrid=True, tickfont_size=15)
fig2.update_layout(title=dict(x=0.5, text="Doubling Time in Death Cases for PH"),
xaxis=dict(tickmode='array', #tick0=4, dtick=0,
tickfont_size=9, title="Date", showgrid=False),
legend=dict(orientation="v", title="",
x=0.05, y=0.85, yanchor="bottom"),
hoverlabel=dict(font_size=20))
fig2.update_layout(shapes=[dict(type= 'line',
yref= 'y', y0=30, y1=30,
xref= 'paper', x0=0, x1=1,
line=dict(color="orange", width=1))])
fig2.show()
# version3
r2 = r1.copy()
r2['doubling_time1'] = np.log(2) / np.log(1 + (r2['%Change']/100))
r2['doubling_time2'] = np.log(2) / np.log(1 + (r2['trend']/100))
r2['Days'] = 'Doubling Time'
r2['Trend_name1'] = 'Trend (based on HP Filter)'
fig_trend = px.line(r2, x='Date', y='doubling_time2', color='Trend_name1',
labels={'doubling_time2': 'Days-To-Double'},
color_discrete_sequence=["limegreen"])
fig2 = px.line(r2, x='Date', y='doubling_time1', color='Days',
labels={'doubling_time1': 'Days-To-Double'},
color_discrete_sequence=["lightgreen"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="Days-to-Double", showgrid=True, tickfont_size=15)
fig2.update_layout(title=dict(x=0.5, text="Doubling Time in Recovered Cases for PH"),
xaxis=dict(tickmode='array', #tick0=4, dtick=0,
tickfont_size=9, title="Date", showgrid=False),
legend=dict(orientation="v", title="",
x=0.05, y=0.85, yanchor="bottom"),
hoverlabel=dict(font_size=20))
# fig2.update_layout(shapes=[dict(type= 'line',
# yref= 'y', y0=30, y1=30,
# xref= 'paper', x0=0, x1=1,
# line=dict(color="orange", width=1))])
fig2.show()
Data Source: https://www.worldometers.info/coronavirus/
print("WORLDOMETER Last download: ", pd.to_datetime('today').strftime("%b %d, %Y %I:%M:%S"))
url = 'https://www.worldometers.info/coronavirus/'
html = requests.get(url).content
df_list = pd.read_html(html)
df = df_list[-1].iloc[1:-1,1:]
def human_format(num):
num = float('{:.3g}'.format(num))
magnitude = 0
while abs(num) >= 1000:
magnitude += 1
num /= 1000.0
return '{}{}'.format('{:f}'.format(num).rstrip('0').rstrip('.'), ['', 'K', 'M', 'B', 'T'][magnitude])
asean = ['Philippines', 'Indonesia', 'Malaysia', 'Singapore', 'Thailand',
'Brunei', 'Laos', 'Cambodia', 'Vietnam', 'Burma', 'Timor Leste']
df_asean = df[df['Country,Other'].isin(asean)].reset_index(drop=True)
df_asean['Pop'] = df_asean['Population'].apply(lambda x : human_format(x))
df_asean['%Active'] = round(df_asean['ActiveCases'] / df_asean['TotalCases']*100, 1)
df_asean['%Death'] = round(df_asean['TotalDeaths'] / df_asean['TotalCases']*100, 1)
df_asean['%Recovered'] = round(df_asean['TotalRecovered'] / df_asean['TotalCases']*100,1)
figq = px.bar(df_asean[::-1], x='TotalCases', y='Country,Other',
orientation='h', text='TotalCases', hover_data=['Pop'],
labels={'Country,Other': 'Country'}, #hover_data=['ActiveCases'],
color_discrete_sequence=[px.colors.qualitative.Plotly[0]])
figq.update_traces(textposition='outside', textfont_size=12)
figq.update_yaxes(tickfont_size=15)
figq.update_xaxes(range=(0, df_asean['TotalCases'].max()*1.1))
figq.update_layout(xaxis=dict(tickfont_size=9, title='Number of Cases'),
yaxis=dict(title=None),
title=dict(x=0.5, text="Covid-19 Confirmed Cases in ASEAN Region"),
# legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"),
hoverlabel=dict(font_size=20))
figq.show()
#############################################################################
#############################################################################
#############################################################################
figA = px.bar(df_asean[::-1], x='%Active', y='Country,Other',
orientation='h', text='%Active',
labels={'Country,Other': 'Country'}, hover_data=['ActiveCases'],
color_discrete_sequence=[px.colors.qualitative.Plotly[0]])
figA.update_traces(textposition='outside', textfont_size=12)
figA.update_layout(xaxis=dict(tickfont_size=9),
#title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
#legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"),
hoverlabel=dict(font_size=20))
# figA.show()
figB = px.bar(df_asean[::-1], x='%Death', y='Country,Other',
orientation='h', text='%Death',
labels={'Country,Other': 'Country'}, hover_data=['TotalDeaths'],
color_discrete_sequence=[px.colors.qualitative.Plotly[1]])
figB.update_traces(textposition='outside', textfont_size=12)
figB.update_layout(xaxis=dict(tickfont_size=9),
#title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
#legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"),
hoverlabel=dict(font_size=20))
# figA.show()
figC = px.bar(df_asean[::-1], x='%Recovered', y='Country,Other',
orientation='h', text='%Recovered',
labels={'Country,Other': 'Country'}, hover_data=['TotalRecovered'],
color_discrete_sequence=[px.colors.qualitative.Plotly[2]])
figC.update_traces(textposition='outside', textfont_size=12)
figC.update_layout(xaxis=dict(tickfont_size=9),
#title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
#legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"),
hoverlabel=dict(font_size=20))
# figC.show()
##############################################################################
fig = px.bar(df_asean[::-1], x='Tot\xa0Cases/1M pop', y='Country,Other',
orientation='h', text='Tot\xa0Cases/1M pop', labels={'Country,Other': 'Country'},
hover_name='Tot\xa0Cases/1M pop', hover_data=['TotalCases', 'Pop'],
color_discrete_sequence=[px.colors.qualitative.Plotly[0]])
fig.update_traces(textposition='outside', textfont_size=12)
fig.update_layout(xaxis=dict(tickfont_size=9),
#title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
#legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"),
hoverlabel=dict(font_size=20))
# fig.show()
fig1 = px.bar(df_asean[::-1], x='Deaths/1M pop', y='Country,Other',
orientation='h', text='Deaths/1M pop', labels={'Country,Other': 'Country'},
hover_name='Deaths/1M pop', hover_data=['TotalDeaths', 'Pop'],
color_discrete_sequence=[px.colors.qualitative.Plotly[1]])
fig1.update_traces(textposition='outside', textfont_size=12)
fig1.update_layout(xaxis=dict(tickfont_size=9),
#title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
#legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"),
hoverlabel=dict(font_size=20))
#fig1.show()
fig2 = px.bar(df_asean[::-1], x='Tests/ 1M pop', y='Country,Other',
orientation='h', text='Tests/ 1M pop', labels={'Country,Other': 'Country'},
hover_name='Tests/ 1M pop', hover_data=['TotalTests', 'Pop'],
color_discrete_sequence=[px.colors.qualitative.Plotly[2]])
fig2.update_traces(textposition='outside', textfont_size=12)
fig2.update_layout(xaxis=dict(tickfont_size=9),
#title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
#legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"),
hoverlabel=dict(font_size=20))
#fig2.show()
##############################################################################
##############################################################################
##############################################################################
fig000 = make_subplots(rows=2, cols=3, subplot_titles=("%Active Cases", "%Deaths", "%Recovered",
"Cases per 1M Pop'n", "Deaths per 1M Pop'n", "Test per 1M Pop'n"))
# FigA
fig000.add_trace(figA.data[0], row=1, col=1)
fig000.update_yaxes(tickfont_size=15, row=1, col=1)
fig000.update_xaxes(range=(0, df_asean['%Active'].max()*1.25), title='Percent(%)', row=1, col=1)
# fig000.update_xaxes(range=(0, 120), row=1, col=1)
# FigB
fig000.add_trace(figB.data[0], row=1, col=2)
fig000.update_yaxes(showticklabels=False, row=1, col=2)
fig000.update_xaxes(range=(0, df_asean['%Death'].max()*1.5), title='Percent(%)', row=1, col=2)
# fig000.update_xaxes(range=(0, 120), row=1, col=2)
# FigC
fig000.add_trace(figC.data[0], row=1, col=3)
fig000.update_yaxes(showticklabels=False, row=1, col=3)
fig000.update_xaxes(range=(0, df_asean['%Recovered'].max()*1.25), title='Percent(%)', row=1, col=3)
# fig000.update_xaxes(range=(0, 120), row=1, col=3)
##############################################################################
# Fig
fig000.add_trace(fig.data[0], row=2, col=1)
fig000.update_yaxes(tickfont_size=15, row=2, col=1)
fig000.update_xaxes(range=(0, df_asean['Tot\xa0Cases/1M pop'].max()*1.25), title='Count', row=2, col=1)
# Fig1
fig000.add_trace(fig1.data[0], row=2, col=2)
fig000.update_yaxes(showticklabels=False, row=2, col=2)
fig000.update_xaxes(range=(0, df_asean['Deaths/1M pop'].max()*1.25), title='Count', row=2, col=2)
# Fig2
fig000.add_trace(fig2.data[0], row=2, col=3)
fig000.update_yaxes(showticklabels=False, row=2, col=3)
fig000.update_xaxes(range=(0, df_asean['Tests/ 1M pop'].max()*1.25), title='Count', row=2, col=3)
fig000.update_layout(hoverlabel=dict(font_size=20), height=1000)
fig000.show()
Data Source: COVID-19 Data Repository by Johns Hopkins University
SIR model is a kind of compartmental model describing the dynamics of infectious disease. You may wonder why it is called the “compartmental model.” The model divides the population into compartments. Each compartment is expected to have the same characteristics. SIR represents the three compartments segmented by the model.
Source: https://www.lewuathe.com/covid-19-dynamics-with-sir-model.html
#!/usr/bin/python
import numpy as np
import pandas as pd
from csv import reader
from csv import writer
from scipy.integrate import solve_ivp
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from datetime import timedelta, datetime
import argparse
import sys
import json
import ssl
import urllib.request
# DATA FROM JHU SSE
df_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
df_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
df_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
country_df_confirmed = df_confirmed[df_confirmed['Country/Region'] == 'Philippines']
country_df_deaths = df_deaths[df_deaths['Country/Region'] == 'Philippines']
country_df_recovered = df_recovered[df_recovered['Country/Region'] == 'Philippines']
start_date = '2/1/20' # First Case
recovered = country_df_recovered.iloc[0].loc[start_date:]
death = country_df_deaths.iloc[0].loc[start_date:]
confirmed = country_df_confirmed.iloc[0].loc[start_date:]
data = confirmed - recovered - death
First, let's set the initial parameters. For the sake of this modeling effort, we will just play with estimates.
Let's say $S_0$ = 100000 which corresponds to the initial susceptible. Meanwhile, we will set $I_0$ = 1 (initial infected) and $R_0$ = 0 (initial recovered), same as the PH statistics we have as of January 30, 2020.
s_0 = 100000 # Initial Susceptible; Setting the whole population in the country is not realistic for sure.
i_0 = 1 # country_df_confirmed.iloc[0].loc[start_date:][0] # 1, Initial Infected
r_0 = 0 # country_df_recovered.iloc[0].loc[start_date:][0] # 0, Initial Recovered
dayforecast = 60 # next 30 days
values = data.index
current = datetime.strptime(data.index[-1], '%m/%d/%y')
new_size = len(values)+dayforecast
print('Actual data as of:', values[-1])
while len(values) < new_size:
current = current + timedelta(days=1)
values = np.append(values, datetime.strftime(current, '%m/%d/%y'))
new_index = values
size = len(new_index)
print('Forecast until', '(+'+str(dayforecast)+' days):', new_index[-1])
# loss Function based on root MSE
def loss(point, data, recovered, s_0, i_0, r_0):
size = len(data)
beta, gamma = point
def SIR(t, y):
S = y[0]
I = y[1]
R = y[2]
return [-beta*S*I, beta*S*I-gamma*I, gamma*I]
solution = solve_ivp(SIR, [0, size], [s_0, i_0, r_0], t_eval=np.arange(0, size, 1), vectorized=True)
l1 = np.sqrt(np.mean((solution.y[1] - data)**2))
l2 = np.sqrt(np.mean((solution.y[2] - recovered)**2))
alpha = 0.1
return alpha * l1 + (1 - alpha) * l2
# SIR Model
def SIR(t, y):
S = y[0]
I = y[1]
R = y[2]
return [-beta*S*I, beta*S*I-gamma*I, gamma*I]
Next, find the values of beta (β) and gamma (γ) that give the smallest residual sum of squares (RSS), which represents the best fit to the data. β is the transmissivity rate or the parameter controlling how much the disease can be transmitted through exposure. It is determined by the chance of contact and the probability of disease transmission. γ is the recovery rate or the parameter expressing how much the disease can be recovered in a specific period.
# Optimizate to estimate the beta and gamma fitting the given confirmed cases
optimal = minimize(loss, [0.001, 0.001], args=(data, recovered, s_0, i_0, r_0),
method='L-BFGS-B', bounds=[(0.00000001, 0.4), (0.00000001, 0.4)])
print(optimal)
beta, gamma = optimal.x
print('β: ', beta)
print('γ: ', gamma)
# Predict Future Values Based on SIR Model
new_index = values
size = len(new_index)
extended_actual = np.concatenate((data.values, [None] * (size - len(data.values))))
extended_recovered = np.concatenate((recovered.values, [None] * (size - len(recovered.values))))
extended_death = np.concatenate((death.values, [None] * (size - len(death.values))))
prediction = solve_ivp(SIR, [0, size], [s_0, i_0, r_0], t_eval=np.arange(0, size, 1))
# Plot the graph
df = pd.DataFrame({'Active Cases (Infected)': extended_actual, 'Recovered Cases': extended_recovered, 'Death Cases': extended_death,
'Susceptible': prediction.y[0], 'Infected': prediction.y[1], 'Recovered': prediction.y[2]}, index=new_index)
df.index = pd.to_datetime(df.index)
df['AC'] = df.columns[0]
df['RC'] = df.columns[1]
df['DC'] = df.columns[2]
df['S'] = df.columns[3]
df['I'] = df.columns[4]
df['R'] = df.columns[5]
fig = px.line(df, x=df.index, y=df.columns[0], color='AC')
fig.add_trace(px.line(df, x=df.index, y=df.columns[1], color='RC', color_discrete_sequence=['green']).data[0])
fig.add_trace(px.line(df, x=df.index, y=df.columns[2], color='DC', color_discrete_sequence=['red']).data[0])
# Forecast
fig.add_trace(px.line(df, x=df.index, y=df.columns[3], color='S', color_discrete_sequence=['orange']).data[0].update(line={'dash': 'dash'}))
fig.add_trace(px.line(df, x=df.index, y=df.columns[4], color='I', color_discrete_sequence=['blue']).data[0].update(line={'dash': 'dash'}))
fig.add_trace(px.line(df, x=df.index, y=df.columns[5], color='R', color_discrete_sequence=['green']).data[0].update(line={'dash': 'dash'}))
fig.update_layout(title=dict(x=0.5, text="SIR Model"),
xaxis=dict(tickmode='array', tickfont_size=12, title="Date", showgrid=False),
yaxis=dict(title='Cases'),
legend=dict(orientation="v", title="", x=0.02, y=0.1, yanchor="bottom"),
hoverlabel=dict(font_size=20))
fig.show()
In a pandemic, the number of cases is expected to have an exponential growth but eventually, it will reach a certain limit given the efforts of the government / health sectors in preventing the spread of the disease. Considering this, we can imagine an "S" curve or in mathematical terms, it's a logistic curve. Therefore, a good way to model is to fit our data in a logistic curve to get better forecast. Thankfully, there is a Python library that we can use.
Prophet is a forecasting procedure developed and released by Facebook’s Core Data Science team. Aside from the typical linear growth time series modeling, it can also handle logistic growth.
import pandas as pd
import numpy as np
from fbprophet import Prophet
import pickle
import math
import scipy.optimize as optimize
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import logging
logging.getLogger('fbprophet').setLevel(logging.WARNING)
#cases_daily = pd.read_excel("./data/cases_daily.xlsx")
cases_daily = confirmed.diff().reset_index().rename({'index': 'DateRepConf', 182: 'CaseCode'}, axis=1)
data = cases_daily.set_index('DateRepConf')
data.index = pd.to_datetime(data.index) ######################################
idx = pd.date_range(data.index[0].date(), data.index[-1].date())
data.index = pd.DatetimeIndex(data.index)
data = data.reindex(idx, fill_value=0)
data=data.cumsum()
data = data.reset_index(drop=False)
data.columns = ['Timestep', 'Total Cases']
data['Total Cases'] = data['Total Cases'].fillna(0).astype('int64') ##########
The challenge now is to define the "maximum carrying capacity" as required by the logistic growth function of the Prophet tool. Supposedly, this is a known variable but in our case, we really don't know our ceiling (for the 1st wave at least). To make a logical way to define this, let's derive the capacity based on the smallest mean square error (MSE) from the last 30 days of available data. Meaning, we will compute the best fit given a certain range of capacity or ceiling.
lalala = pd.DataFrame()
for cap in list(range(round(data['Total Cases'].tail(1).values[0],-2), 21000, 500)):
df = data.copy()
df['cap'] = cap
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df
# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)
# STEP 2: Specify the number of days to extend into the future.
forecast_period = 14 # next two weeks
future = m.make_future_dataframe(periods=forecast_period)
future['cap'] = df['cap'].iloc[0]
# STEP 3: Predict the future
forecast = m.predict(future)
from sklearn.metrics import mean_squared_error
# Actual data (Y) on Last 30 days
Y_true = data['Total Cases'][-7:].tolist() # Y_true = Y (original values)
# Calculated values (Yhat) on the last 30 days
Y_pred = forecast['yhat'][:-forecast_period][-7:].tolist() # Y_pred = Y'
# Calculation of Mean Squared Error (MSE)
mse = mean_squared_error(Y_true,Y_pred)
lalala = lalala.append(pd.DataFrame([[cap, mse]], columns=['Capacity','Mean Square Error']))
fig = px.line(lalala, x='Capacity', y='Mean Square Error')
smallest_mse = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
fig.add_annotation(dict(xref="x", yref="y", showarrow=True,
arrowhead=2, xanchor='center', yanchor='bottom',
ax=0, ay=-100,
x=smallest_mse, y=lalala['Mean Square Error'].min(),
text="Smallest MSE : <br> Capacity = " + str(smallest_mse), font_size=18, font_color = 'blue'))
fig.update_layout(title="MSE (Based on the last 30 days) for Each Capacity")
fig.show()
Now, we can proceed with the modeling given the capacity computed above. We will also add our forecast based on this on the next 30 days. To consider the uncertainties, we will be defining an interval width at 90% confidence.
# STEP 0: Capacity computed based on MSE
df = data.copy()
df['cap'] = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df
# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)
# STEP 2: Specify the number of days to extend into the future.
future = m.make_future_dataframe(periods=30)
future['cap'] = df['cap'].iloc[0]
# STEP 3: Predict the future
forecast = m.predict(future)
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()
fig = plot_plotly(m, forecast, xlabel="Date", ylabel="Case Count", figsize=(1000, 700))
fig.update_layout(title="PH Forecast (Next 30 days) on Total Confirmed Cases", hoverlabel=dict(font_size=20))
fig.update_traces(line=dict(color='blue'))
fig.show()
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
metric_df = forecast.set_index('ds')[['yhat']].join(df.set_index('ds').y).reset_index()
metric_df.dropna(inplace=True)
print("------------ Accuracy ------------")
print("R-squared: ", round(r2_score(metric_df.y, metric_df.yhat),4)*100, '%')
print("Mean Squared Error: ", mean_squared_error(metric_df.y, metric_df.yhat))
print("Mean Absolute Error: ", mean_absolute_error(metric_df.y, metric_df.yhat))
deaths_daily = pd.read_excel("./data/deaths_daily.xlsx")
data = deaths_daily.set_index('DateRepRem')
data.index = pd.to_datetime(data.index) ######################################
# idx = pd.date_range(data.index[0].date(), data.index[-1].date())
idx = pd.date_range(data.index[0].date(), data.index[-1].date())
data.index = pd.DatetimeIndex(data.index)
data = data.reindex(idx, fill_value=0)
data=data.cumsum()
data = data.reset_index(drop=False)
data.columns = ['Timestep', 'Total Cases']
data['Total Cases'] = data['Total Cases'].fillna(0).astype('int64') ##########
lalala = pd.DataFrame()
for cap in list(range(round(data['Total Cases'].tail(1).values[0],-2), 1500, 10)):
df = data.copy()
df['cap'] = cap
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df
# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)
# STEP 2: Specify the number of days to extend into the future.
future = m.make_future_dataframe(periods=14)
future['cap'] = df['cap'].iloc[0]
# STEP 3: Predict the future
forecast = m.predict(future)
from sklearn.metrics import mean_squared_error
# Actual data (Y) on Last 30 days
Y_true = data['Total Cases'][-30:].tolist() # Y_true = Y (original values)
# Calculated values (Yhat) on the last 30 days
Y_pred = forecast['yhat'][:-14][-30:].tolist() # Y_pred = Y'
# Calculation of Mean Squared Error (MSE)
mse = mean_squared_error(Y_true,Y_pred)
lalala = lalala.append(pd.DataFrame([[cap, mse]], columns=['Capacity','Mean Square Error']))
# fig = px.line(lalala, x='Capacity', y='Mean Square Error')
# smallest_mse = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True,
# arrowhead=2, xanchor='center', yanchor='bottom',
# ax=0, ay=-100,
# x=smallest_mse, y=lalala['Mean Square Error'].min(),
# text="Smallest MSE : <br> Capacity = " + str(smallest_mse), font_size=18, font_color = 'blue'))
# fig.update_layout(title="MSE (Based on the last 30 days) for Each Capacity")
# fig.show()
# STEP 0: Capacity computed based on MSE
df = data.copy()
df['cap'] = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df
# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)
# STEP 2: Specify the number of days to extend into the future.
future = m.make_future_dataframe(periods=30)
future['cap'] = df['cap'].iloc[0]
# STEP 3: Predict the future
forecast = m.predict(future)
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()
fig = plot_plotly(m, forecast, xlabel="Date", ylabel="Case Count", figsize=(1000, 700))
fig.update_layout(title="PH Forecast (Next 30 days) on Death Cases", hoverlabel=dict(font_size=20))
fig.update_traces(line=dict(color='red'))
fig.show()
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
metric_df = forecast.set_index('ds')[['yhat']].join(df.set_index('ds').y).reset_index()
metric_df.dropna(inplace=True)
print("------------ Accuracy ------------")
print("R-squared: ", round(r2_score(metric_df.y, metric_df.yhat),4)*100, '%')
print("Mean Squared Error: ", mean_squared_error(metric_df.y, metric_df.yhat))
print("Mean Absolute Error: ", mean_absolute_error(metric_df.y, metric_df.yhat))
recoveries_daily = pd.read_excel("./data/recoveries_daily.xlsx")
data = recoveries_daily.set_index('DateRepRem')
data.index = pd.to_datetime(data.index) ######################################
# idx = pd.date_range(data.index[0].date(), data.index[-1].date())
idx = pd.date_range(data.index[0].date(), data.index[-1].date())
data.index = pd.DatetimeIndex(data.index)
data = data.reindex(idx, fill_value=0)
data=data.cumsum()
data = data.reset_index(drop=False)
data.columns = ['Timestep', 'Total Cases']
data['Total Cases'] = data['Total Cases'].fillna(0).astype('int64') ##########
lalala = pd.DataFrame()
for cap in list(range(round(data['Total Cases'].tail(1).values[0],-2), 10000, 100)):
df = data.copy()
df['cap'] = cap
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df
# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)
# STEP 2: Specify the number of days to extend into the future.
future = m.make_future_dataframe(periods=14)
future['cap'] = df['cap'].iloc[0]
# STEP 3: Predict the future
forecast = m.predict(future)
from sklearn.metrics import mean_squared_error
# Actual data (Y) on Last 30 days
Y_true = data['Total Cases'][-30:].tolist() # Y_true = Y (original values)
# Calculated values (Yhat) on the last 30 days
Y_pred = forecast['yhat'][:-14][-30:].tolist() # Y_pred = Y'
# Calculation of Mean Squared Error (MSE)
mse = mean_squared_error(Y_true,Y_pred)
lalala = lalala.append(pd.DataFrame([[cap, mse]], columns=['Capacity','Mean Square Error']))
# fig = px.line(lalala, x='Capacity', y='Mean Square Error')
# smallest_mse = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True,
# arrowhead=2, xanchor='center', yanchor='bottom',
# ax=0, ay=-100,
# x=smallest_mse, y=lalala['Mean Square Error'].min(),
# text="Smallest MSE : <br> Capacity = " + str(smallest_mse), font_size=18, font_color = 'blue'))
# fig.update_layout(title="MSE (Based on the last 30 days) for Each Capacity")
# fig.show()
# STEP 0: Capacity computed based on MSE
df = data.copy()
df['cap'] = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df
# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)
# STEP 2: Specify the number of days to extend into the future.
future = m.make_future_dataframe(periods=30)
future['cap'] = df['cap'].iloc[0]
# STEP 3: Predict the future
forecast = m.predict(future)
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()
fig = plot_plotly(m, forecast, xlabel="Date", ylabel="Case Count", figsize=(1000, 700))
fig.update_layout(title="PH Forecast (Next 30 days) on Recovered Cases", hoverlabel=dict(font_size=20))
fig.update_traces(line=dict(color='green'))
fig.show()
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
metric_df = forecast.set_index('ds')[['yhat']].join(df.set_index('ds').y).reset_index()
metric_df.dropna(inplace=True)
print("------------ Accuracy ------------")
print("R-squared: ", round(r2_score(metric_df.y, metric_df.yhat),4)*100, '%')
print("Mean Squared Error: ", mean_squared_error(metric_df.y, metric_df.yhat))
print("Mean Absolute Error: ", mean_absolute_error(metric_df.y, metric_df.yhat))